diff --git a/src/ast/block.rs b/src/ast/block.rs index f0c7b5e..7aa61fb 100644 --- a/src/ast/block.rs +++ b/src/ast/block.rs @@ -1,112 +1,89 @@ -use crate::{SyntaxKind, SyntaxNode}; +use crate::SyntaxKind; -use super::{filter_token, SourceBlock, Token}; - -fn argument(node: &SyntaxNode, name: &str) -> Option { - node.children() - .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .and_then(|n| { - let mut iter = n - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .skip_while(|n| n != name); - - iter.next()?; - - Some(iter.next().unwrap_or_default()) - }) -} +use super::{filter_token, ExportBlock, SourceBlock, Token}; impl SourceBlock { /// ```rust /// use orgize::{Org, ast::SourceBlock}; /// /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.language(), "c"); + /// assert_eq!(block.language().unwrap(), "c"); /// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.language(), "javascript"); + /// assert_eq!(block.language().unwrap(), "javascript"); + /// /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.language(), ""); + /// assert!(block.language().is_none()); /// ```` - pub fn language(&self) -> Token { + pub fn language(&self) -> Option { self.syntax .children() .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .and_then(|n| { - n.children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .nth(2) - }) - .unwrap_or_default() + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE)) + } + + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.switches().unwrap(), "-n 20"); + /// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.switches().unwrap(), "-n 20 -r"); + /// + /// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// ```` + pub fn switches(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES)) } /// ```rust /// use orgize::{Org, ast::SourceBlock}; /// /// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.tangle().unwrap(), "yes"); - /// let block = Org::parse("#+begin_src c :tangle\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.tangle().unwrap(), ""); - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.tangle().is_none()); - /// ```` - pub fn tangle(&self) -> Option { - argument(&self.syntax, ":tangle") - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; + /// assert_eq!(block.parameters().unwrap(), ":tangle yes"); + /// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.parameters().unwrap(), ":tangle"); /// - /// let block = Org::parse("#+begin_src c :mkdir yes\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.mkdir().unwrap(), "yes"); - /// let block = Org::parse("#+begin_src c :mkdir\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.mkdir().unwrap(), ""); /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.mkdir().is_none()); + /// assert!(block.parameters().is_none()); /// ```` - pub fn mkdir(&self) -> Option { - argument(&self.syntax, ":mkdir") - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src c :comments both\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.comments().unwrap(), "both"); - /// let block = Org::parse("#+begin_src c :comments\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.comments().unwrap(), ""); - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.comments().is_none()); - /// ```` - pub fn comments(&self) -> Option { - argument(&self.syntax, ":comments") - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src c :padline yes\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.padline().unwrap(), "yes"); - /// let block = Org::parse("#+begin_src c :padline\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.padline().unwrap(), ""); - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.padline().is_none()); - /// ```` - pub fn padline(&self) -> Option { - argument(&self.syntax, ":padline") - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src c :tangle-mode o444\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.tangle_mode().unwrap(), "o444"); - /// let block = Org::parse("#+begin_src c :tangle-mode\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.tangle_mode().unwrap(), ""); - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.tangle_mode().is_none()); - /// ```` - pub fn tangle_mode(&self) -> Option { - argument(&self.syntax, ":tangle-mode") + pub fn parameters(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS)) + } +} + +impl ExportBlock { + /// ```rust + /// use orgize::{Org, ast::ExportBlock}; + /// + /// let block = Org::parse("#+begin_export html\n#+end_export").first_node::().unwrap(); + /// assert_eq!(block.ty().unwrap(), "html"); + /// + /// let block = Org::parse("#+begin_export\n#+end_export").first_node::().unwrap(); + /// assert!(block.ty().is_none()); + /// ```` + pub fn ty(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE)) } } diff --git a/src/ast/generate.js b/src/ast/generate.js index d083f8f..e286971 100644 --- a/src/ast/generate.js +++ b/src/ast/generate.js @@ -27,6 +27,7 @@ const nodes = [ first_child: [ ["section", "Section"], ["planning", "Planning"], + ["properties", "PropertyDrawer"], ], children: [["headlines", "Headline"]], post_blank: true, diff --git a/src/ast/generated.rs b/src/ast/generated.rs index ccdfa77..5bdc37d 100644 --- a/src/ast/generated.rs +++ b/src/ast/generated.rs @@ -160,6 +160,9 @@ impl Headline { pub fn planning(&self) -> Option { support::child(&self.syntax) } + pub fn properties(&self) -> Option { + support::child(&self.syntax) + } pub fn headlines(&self) -> AstChildren { support::children(&self.syntax) } diff --git a/src/ast/keyword.rs b/src/ast/keyword.rs new file mode 100644 index 0000000..86c7aba --- /dev/null +++ b/src/ast/keyword.rs @@ -0,0 +1,39 @@ +use crate::SyntaxKind; + +use super::{filter_token, Keyword, Token}; + +impl Keyword { + /// + /// ```rust + /// use orgize::{Org, ast::Keyword}; + /// + /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.key(), "KEY"); + /// ``` + pub fn key(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .unwrap_or_else(|| { + debug_assert!(false, "keyword must contains TEXT"); + Token::default() + }) + } + + /// + /// ```rust + /// use orgize::{Org, ast::Keyword}; + /// + /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.value(), " VALUE"); + /// let keyword = Org::parse("#+KEY:").first_node::().unwrap(); + /// assert_eq!(keyword.value(), ""); + /// ``` + pub fn value(&self) -> Token { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .nth(1) + .unwrap_or_default() + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4be4ca4..ff56aa8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10,6 +10,7 @@ mod entity; mod headline; mod inline_call; mod inline_src; +mod keyword; mod link; mod list; mod macros; diff --git a/src/export/traverse.rs b/src/export/traverse.rs index 42e2c24..c47a6fe 100644 --- a/src/export/traverse.rs +++ b/src/export/traverse.rs @@ -199,6 +199,7 @@ pub trait Traverser { SUBSCRIPT => walk!(Subscript), KEYWORD => walk!(Keyword), PROPERTY_DRAWER => walk!(PropertyDrawer), + NODE_PROPERTY => {} BLOCK_CONTENT | LIST_ITEM_CONTENT => { for child in node.children_with_tokens() { self.element(child, ctx); diff --git a/src/syntax/block.rs b/src/syntax/block.rs index 8f5d797..cbeb264 100644 --- a/src/syntax/block.rs +++ b/src/syntax/block.rs @@ -1,9 +1,10 @@ -use jetscii::Substring; use nom::{ - bytes::complete::{tag, tag_no_case, take_while1}, - character::complete::{space0, space1}, - sequence::tuple, - IResult, InputTake, + branch::alt, + bytes::complete::{tag, tag_no_case, take_while, take_while1}, + character::complete::{alpha1, space0, space1}, + combinator::{cond, opt}, + sequence::{separated_pair, tuple}, + IResult, InputLength, InputTake, }; use super::{ @@ -52,22 +53,91 @@ fn block_node_base(input: Input) -> IResult { } fn block_begin_node(input: Input) -> IResult { - let (input, (ws, start, name, (argument, ws_, nl))) = tuple(( - space0, - tag_no_case("#+BEGIN_"), - take_while1(|c| c != ' ' && c != '\t' && c != '\r' && c != '\n'), - trim_line_end, - ))(input)?; + let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?; let mut b = NodeBuilder::new(); - b.ws(ws); - b.text(start); + b.ws(ws1); + b.text(begin); b.text(name); - b.children.extend(block_argument(argument)?.1); - b.ws(ws_); - b.nl(nl); - Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + if name.s.eq_ignore_ascii_case("SRC") { + let (input, language) = opt(tuple(( + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + )))(input)?; + let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?; + let (input, ws1) = space0(input)?; + let (input, (parameters, ws2, nl)) = trim_line_end(input)?; + + if let Some((ws, language)) = language { + b.ws(ws); + b.token(SRC_BLOCK_LANGUAGE, language); + } + if let Some((ws, switches)) = switches { + b.ws(ws); + b.token(SRC_BLOCK_SWITCHES, switches); + } + b.ws(ws1); + if !parameters.is_empty() { + b.token(SRC_BLOCK_PARAMETERS, parameters); + } + b.ws(ws2); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } else if name.s.eq_ignore_ascii_case("EXPORT") { + let (input, ty) = opt(tuple(( + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + )))(input)?; + let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; + let (input, nl) = eol_or_eof(input)?; + + if let Some((ws, ty)) = ty { + b.ws(ws); + b.token(EXPORT_BLOCK_TYPE, ty); + } + b.text(data); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } else { + let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; + let (input, nl) = eol_or_eof(input)?; + + b.text(data); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } +} + +fn source_block_switches(input: Input) -> IResult { + let mut i = input; + + while !i.is_empty() { + match tuple::<_, _, (), _>(( + cond(i.input_len() != input.input_len(), space1), + alt(( + separated_pair( + alt((tag("-l"), tag("-n"))), + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + ), + tuple((tag("+"), alpha1)), + tuple((tag("-"), alpha1)), + )), + ))(i) + { + Ok((i_, _)) => i = i_, + _ => break, + } + } + + let len = input.input_len() - i.input_len(); + + if len == 0 { + Err(nom::Err::Error(())) + } else { + Ok(input.take_split(len)) + } } fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult, GreenElement, ()> { @@ -112,55 +182,6 @@ fn comma_quoted_text_nodes(input: Input) -> Vec { nodes } -fn block_argument(input: Input) -> IResult, ()> { - let mut b = NodeBuilder::new(); - - let mut i = input; - - while !i.is_empty() { - let (input, ws) = space1(i)?; - b.ws(ws); - let (input, name) = take_while1(|c| c != ' ' && c != '\t')(input)?; - b.text(name); - if !name.s.starts_with(':') || input.is_empty() { - debug_assert!( - input.s.len() < i.s.len(), - "{} < {}", - input.s.len(), - i.s.len() - ); - i = input; - continue; - } - let (input, ws) = space1(input)?; - b.ws(ws); - - if let Some(idx) = Substring::new(" :") - .find(input.s) - .or_else(|| Substring::new("\t:").find(input.s)) - { - let idx = input.s[0..idx] - .rfind(|c| c != ' ' && c != '\t') - .map(|i| i + 1) - .unwrap_or(idx); - let (input, argument) = input.take_split(idx); - b.text(argument); - debug_assert!( - input.s.len() < i.s.len(), - "{} < {}", - input.s.len(), - i.s.len() - ); - i = input; - } else { - b.text(input); - break; - } - } - - Ok((i, b.children)) -} - #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))] pub fn block_node(input: Input) -> IResult { crate::lossless_parser!(block_node_base, input) @@ -255,19 +276,11 @@ alert('Hello World!'); TEXT@0..8 "#+BEGIN_" TEXT@8..11 "SRC" WHITESPACE@11..12 " " - TEXT@12..22 "javascript" + SRC_BLOCK_LANGUAGE@12..22 "javascript" WHITESPACE@22..24 " " - TEXT@24..26 "-n" - WHITESPACE@26..27 " " - TEXT@27..29 "20" - WHITESPACE@29..30 " " - TEXT@30..32 "-r" + SRC_BLOCK_SWITCHES@24..32 "-n 20 -r" WHITESPACE@32..34 " " - TEXT@34..38 ":var" - WHITESPACE@38..39 " " - TEXT@39..47 "n=0, l=2" - WHITESPACE@47..49 " " - TEXT@49..57 ":foo=bar" + SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar" NEW_LINE@57..58 "\n" BLOCK_CONTENT@58..81 TEXT@58..81 "alert('Hello World!');\n" diff --git a/src/syntax/combinator.rs b/src/syntax/combinator.rs index 2047e96..dc86e51 100644 --- a/src/syntax/combinator.rs +++ b/src/syntax/combinator.rs @@ -76,7 +76,7 @@ macro_rules! lossless_parser { ($parser:expr, $input:expr) => {{ let i_ = $input; let (i, o) = $parser($input)?; - tracing::info!(consumed = o.to_string()); + tracing::trace!(consumed = o.to_string()); debug_assert_eq!( &i_.as_str()[0..(i_.s.len() - i.s.len())], &o.to_string(), @@ -292,7 +292,9 @@ impl NodeBuilder { } pub fn text(&mut self, i: Input) { - self.children.push(i.text_token()) + if !i.is_empty() { + self.children.push(i.text_token()) + } } pub fn token(&mut self, kind: SyntaxKind, i: Input) { diff --git a/src/syntax/fn_def.rs b/src/syntax/fn_def.rs index adcc69b..847eae5 100644 --- a/src/syntax/fn_def.rs +++ b/src/syntax/fn_def.rs @@ -110,7 +110,6 @@ fn parse() { COLON@3..4 ":" TEXT@4..8 "WORD" R_BRACKET@8..9 "]" - TEXT@9..9 "" "### ); diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 24dcf84..a2659b6 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -178,6 +178,10 @@ pub enum SyntaxKind { BLOCK_BEGIN, BLOCK_END, BLOCK_CONTENT, + SRC_BLOCK_SWITCHES, + SRC_BLOCK_LANGUAGE, + SRC_BLOCK_PARAMETERS, + EXPORT_BLOCK_TYPE, LATEX_ENVIRONMENT, //