feat: block parsing

This commit is contained in:
PoiScript 2023-12-13 01:07:11 +08:00
parent b2123d1acd
commit 27de7ee68c
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
10 changed files with 208 additions and 168 deletions

View file

@ -1,112 +1,89 @@
use crate::{SyntaxKind, SyntaxNode};
use crate::SyntaxKind;
use super::{filter_token, SourceBlock, Token};
fn argument(node: &SyntaxNode, name: &str) -> Option<Token> {
node.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.and_then(|n| {
let mut iter = n
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.skip_while(|n| n != name);
iter.next()?;
Some(iter.next().unwrap_or_default())
})
}
use super::{filter_token, ExportBlock, SourceBlock, Token};
impl SourceBlock {
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language(), "c");
/// assert_eq!(block.language().unwrap(), "c");
/// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language(), "javascript");
/// assert_eq!(block.language().unwrap(), "javascript");
///
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language(), "");
/// assert!(block.language().is_none());
/// ````
pub fn language(&self) -> Token {
pub fn language(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.and_then(|n| {
n.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(2)
})
.unwrap_or_default()
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20");
/// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20 -r");
///
/// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// ````
pub fn switches(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.tangle().unwrap(), "yes");
/// let block = Org::parse("#+begin_src c :tangle\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.tangle().unwrap(), "");
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.tangle().is_none());
/// ````
pub fn tangle(&self) -> Option<Token> {
argument(&self.syntax, ":tangle")
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
/// assert_eq!(block.parameters().unwrap(), ":tangle yes");
/// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle");
///
/// let block = Org::parse("#+begin_src c :mkdir yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.mkdir().unwrap(), "yes");
/// let block = Org::parse("#+begin_src c :mkdir\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.mkdir().unwrap(), "");
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.mkdir().is_none());
/// assert!(block.parameters().is_none());
/// ````
pub fn mkdir(&self) -> Option<Token> {
argument(&self.syntax, ":mkdir")
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :comments both\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.comments().unwrap(), "both");
/// let block = Org::parse("#+begin_src c :comments\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.comments().unwrap(), "");
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.comments().is_none());
/// ````
pub fn comments(&self) -> Option<Token> {
argument(&self.syntax, ":comments")
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :padline yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.padline().unwrap(), "yes");
/// let block = Org::parse("#+begin_src c :padline\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.padline().unwrap(), "");
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.padline().is_none());
/// ````
pub fn padline(&self) -> Option<Token> {
argument(&self.syntax, ":padline")
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :tangle-mode o444\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.tangle_mode().unwrap(), "o444");
/// let block = Org::parse("#+begin_src c :tangle-mode\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.tangle_mode().unwrap(), "");
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.tangle_mode().is_none());
/// ````
pub fn tangle_mode(&self) -> Option<Token> {
argument(&self.syntax, ":tangle-mode")
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS))
}
}
impl ExportBlock {
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse("#+begin_export html\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.ty().unwrap(), "html");
///
/// let block = Org::parse("#+begin_export\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert!(block.ty().is_none());
/// ````
pub fn ty(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE))
}
}

View file

@ -27,6 +27,7 @@ const nodes = [
first_child: [
["section", "Section"],
["planning", "Planning"],
["properties", "PropertyDrawer"],
],
children: [["headlines", "Headline"]],
post_blank: true,

View file

@ -160,6 +160,9 @@ impl Headline {
pub fn planning(&self) -> Option<Planning> {
support::child(&self.syntax)
}
pub fn properties(&self) -> Option<PropertyDrawer> {
support::child(&self.syntax)
}
pub fn headlines(&self) -> AstChildren<Headline> {
support::children(&self.syntax)
}

39
src/ast/keyword.rs Normal file
View file

@ -0,0 +1,39 @@
use crate::SyntaxKind;
use super::{filter_token, Keyword, Token};
impl Keyword {
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.key(), "KEY");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "keyword must contains TEXT");
Token::default()
})
}
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), " VALUE");
/// let keyword = Org::parse("#+KEY:").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), "");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.unwrap_or_default()
}
}

View file

@ -10,6 +10,7 @@ mod entity;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;

View file

@ -199,6 +199,7 @@ pub trait Traverser {
SUBSCRIPT => walk!(Subscript),
KEYWORD => walk!(Keyword),
PROPERTY_DRAWER => walk!(PropertyDrawer),
NODE_PROPERTY => {}
BLOCK_CONTENT | LIST_ITEM_CONTENT => {
for child in node.children_with_tokens() {
self.element(child, ctx);

View file

@ -1,9 +1,10 @@
use jetscii::Substring;
use nom::{
bytes::complete::{tag, tag_no_case, take_while1},
character::complete::{space0, space1},
sequence::tuple,
IResult, InputTake,
branch::alt,
bytes::complete::{tag, tag_no_case, take_while, take_while1},
character::complete::{alpha1, space0, space1},
combinator::{cond, opt},
sequence::{separated_pair, tuple},
IResult, InputLength, InputTake,
};
use super::{
@ -52,22 +53,91 @@ fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
}
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let (input, (ws, start, name, (argument, ws_, nl))) = tuple((
space0,
tag_no_case("#+BEGIN_"),
take_while1(|c| c != ' ' && c != '\t' && c != '\r' && c != '\n'),
trim_line_end,
))(input)?;
let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(start);
b.ws(ws1);
b.text(begin);
b.text(name);
b.children.extend(block_argument(argument)?.1);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
if name.s.eq_ignore_ascii_case("SRC") {
let (input, language) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?;
let (input, ws1) = space0(input)?;
let (input, (parameters, ws2, nl)) = trim_line_end(input)?;
if let Some((ws, language)) = language {
b.ws(ws);
b.token(SRC_BLOCK_LANGUAGE, language);
}
if let Some((ws, switches)) = switches {
b.ws(ws);
b.token(SRC_BLOCK_SWITCHES, switches);
}
b.ws(ws1);
if !parameters.is_empty() {
b.token(SRC_BLOCK_PARAMETERS, parameters);
}
b.ws(ws2);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else if name.s.eq_ignore_ascii_case("EXPORT") {
let (input, ty) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
if let Some((ws, ty)) = ty {
b.ws(ws);
b.token(EXPORT_BLOCK_TYPE, ty);
}
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else {
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
}
}
fn source_block_switches(input: Input) -> IResult<Input, Input, ()> {
let mut i = input;
while !i.is_empty() {
match tuple::<_, _, (), _>((
cond(i.input_len() != input.input_len(), space1),
alt((
separated_pair(
alt((tag("-l"), tag("-n"))),
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
),
tuple((tag("+"), alpha1)),
tuple((tag("-"), alpha1)),
)),
))(i)
{
Ok((i_, _)) => i = i_,
_ => break,
}
}
let len = input.input_len() - i.input_len();
if len == 0 {
Err(nom::Err::Error(()))
} else {
Ok(input.take_split(len))
}
}
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
@ -112,55 +182,6 @@ fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
nodes
}
fn block_argument(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut b = NodeBuilder::new();
let mut i = input;
while !i.is_empty() {
let (input, ws) = space1(i)?;
b.ws(ws);
let (input, name) = take_while1(|c| c != ' ' && c != '\t')(input)?;
b.text(name);
if !name.s.starts_with(':') || input.is_empty() {
debug_assert!(
input.s.len() < i.s.len(),
"{} < {}",
input.s.len(),
i.s.len()
);
i = input;
continue;
}
let (input, ws) = space1(input)?;
b.ws(ws);
if let Some(idx) = Substring::new(" :")
.find(input.s)
.or_else(|| Substring::new("\t:").find(input.s))
{
let idx = input.s[0..idx]
.rfind(|c| c != ' ' && c != '\t')
.map(|i| i + 1)
.unwrap_or(idx);
let (input, argument) = input.take_split(idx);
b.text(argument);
debug_assert!(
input.s.len() < i.s.len(),
"{} < {}",
input.s.len(),
i.s.len()
);
i = input;
} else {
b.text(input);
break;
}
}
Ok((i, b.children))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(block_node_base, input)
@ -255,19 +276,11 @@ alert('Hello World!');
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
WHITESPACE@11..12 " "
TEXT@12..22 "javascript"
SRC_BLOCK_LANGUAGE@12..22 "javascript"
WHITESPACE@22..24 " "
TEXT@24..26 "-n"
WHITESPACE@26..27 " "
TEXT@27..29 "20"
WHITESPACE@29..30 " "
TEXT@30..32 "-r"
SRC_BLOCK_SWITCHES@24..32 "-n 20 -r"
WHITESPACE@32..34 " "
TEXT@34..38 ":var"
WHITESPACE@38..39 " "
TEXT@39..47 "n=0, l=2"
WHITESPACE@47..49 " "
TEXT@49..57 ":foo=bar"
SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar"
NEW_LINE@57..58 "\n"
BLOCK_CONTENT@58..81
TEXT@58..81 "alert('Hello World!');\n"

View file

@ -76,7 +76,7 @@ macro_rules! lossless_parser {
($parser:expr, $input:expr) => {{
let i_ = $input;
let (i, o) = $parser($input)?;
tracing::info!(consumed = o.to_string());
tracing::trace!(consumed = o.to_string());
debug_assert_eq!(
&i_.as_str()[0..(i_.s.len() - i.s.len())],
&o.to_string(),
@ -292,7 +292,9 @@ impl NodeBuilder {
}
pub fn text(&mut self, i: Input) {
self.children.push(i.text_token())
if !i.is_empty() {
self.children.push(i.text_token())
}
}
pub fn token(&mut self, kind: SyntaxKind, i: Input) {

View file

@ -110,7 +110,6 @@ fn parse() {
COLON@3..4 ":"
TEXT@4..8 "WORD"
R_BRACKET@8..9 "]"
TEXT@9..9 ""
"###
);

View file

@ -178,6 +178,10 @@ pub enum SyntaxKind {
BLOCK_BEGIN,
BLOCK_END,
BLOCK_CONTENT,
SRC_BLOCK_SWITCHES,
SRC_BLOCK_LANGUAGE,
SRC_BLOCK_PARAMETERS,
EXPORT_BLOCK_TYPE,
LATEX_ENVIRONMENT,
//