diff --git a/src/ast/block.rs b/src/ast/block.rs index 7aa61fb..0138936 100644 --- a/src/ast/block.rs +++ b/src/ast/block.rs @@ -66,6 +66,26 @@ impl SourceBlock { .flat_map(|n| n.children_with_tokens()) .find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS)) } + + /// Return unescaped source code string + /// + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.value(), ""); + /// let block = Org::parse("#+begin_src\n,* foo \n,#+ bar\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.value(), "* foo \n#+ bar\n"); + /// ```` + pub fn value(&self) -> String { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, value| acc + &value) + } } impl ExportBlock { diff --git a/src/ast/comment.rs b/src/ast/comment.rs new file mode 100644 index 0000000..a08bc39 --- /dev/null +++ b/src/ast/comment.rs @@ -0,0 +1,20 @@ +use crate::SyntaxKind; + +use super::{filter_token, Comment}; + +impl Comment { + /// Contents without pound signs + /// + /// ```rust + /// use orgize::{ast::Comment, Org}; + /// + /// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::().unwrap(); + /// assert_eq!(fixed.value(), "A\n\nB\nC"); + /// ``` + pub fn value(&self) -> String { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, text| acc + &text) + } +} diff --git a/src/ast/fixed_width.rs b/src/ast/fixed_width.rs new file mode 100644 index 0000000..b601738 --- /dev/null +++ b/src/ast/fixed_width.rs @@ -0,0 +1,20 @@ +use crate::SyntaxKind; + +use super::{filter_token, FixedWidth}; + +impl FixedWidth { + /// Contents without colons prefix + /// + /// ```rust + /// use orgize::{ast::FixedWidth, Org}; + /// + /// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::().unwrap(); + /// assert_eq!(fixed.value(), "A\n\nB\nC"); + /// ``` + pub fn value(&self) -> String { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, text| acc + &text) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ff56aa8..57ee108 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -5,8 +5,10 @@ mod generated; mod affiliated_keyword; mod block; mod clock; +mod comment; mod drawer; mod entity; +mod fixed_width; mod headline; mod inline_call; mod inline_src; diff --git a/src/syntax/comment.rs b/src/syntax/comment.rs index f77f6b7..b6b86c9 100644 --- a/src/syntax/comment.rs +++ b/src/syntax/comment.rs @@ -1,39 +1,53 @@ -use nom::{IResult, InputTake}; +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::{space0, space1}, + combinator::{iterator, opt}, + sequence::tuple, + IResult, +}; use super::{ - combinator::{blank_lines, line_ends_iter, node, GreenElement}, + combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, input::Input, SyntaxKind, }; fn comment_node_base(input: Input) -> IResult { - let mut start = 0; - for i in line_ends_iter(input.as_str()) { - let mut iter = input.as_bytes()[start..] - .iter() - .skip_while(|&&b| b == b' ' || b == b'\t'); + let mut b = NodeBuilder::new(); - if matches!(iter.next(), Some(b'#')) - && matches!(iter.next(), None | Some(b'\n') | Some(b'\r') | Some(b' ')) - { - start = i; - } else { - break; + let mut iter = iterator( + input, + opt(tuple(( + space0, + tag("#"), + opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), + eol_or_eof, + ))), + ); + + for (idx, option) in iter.enumerate() { + match option { + Some((ws, common, content, eol)) => { + b.ws(ws); + b.token(SyntaxKind::HASH, common); + if let Some((ws, text)) = content { + b.ws(ws); + b.text(text); + } + b.text(eol); + } + _ if idx == 0 => return Err(nom::Err::Error(())), + _ => break, } } - if start == 0 { - return Err(nom::Err::Error(())); - } + let (input, _) = iter.finish()?; - let (input, contents) = input.take_split(start); let (input, post_blank) = blank_lines(input)?; - let mut children = vec![]; - children.push(contents.text_token()); - children.extend(post_blank); + b.children.extend(post_blank); - Ok((input, node(SyntaxKind::COMMENT, children))) + Ok((input, b.finish(SyntaxKind::COMMENT))) } #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))] @@ -65,7 +79,7 @@ fn parse() { t("#"), @r###" COMMENT@0..1 - TEXT@0..1 "#" + HASH@0..1 "#" "### ); @@ -73,7 +87,16 @@ fn parse() { t("#\n # a\n #\n\n"), @r###" COMMENT@0..12 - TEXT@0..11 "#\n # a\n #\n" + HASH@0..1 "#" + TEXT@1..2 "\n" + WHITESPACE@2..4 " " + HASH@4..5 "#" + WHITESPACE@5..6 " " + TEXT@6..7 "a" + TEXT@7..8 "\n" + WHITESPACE@8..9 " " + HASH@9..10 "#" + TEXT@10..11 "\n" BLANK_LINE@11..12 "\n" "### ); @@ -82,7 +105,8 @@ fn parse() { t("#\na\n #\n\n"), @r###" COMMENT@0..2 - TEXT@0..2 "#\n" + HASH@0..1 "#" + TEXT@1..2 "\n" "### ); } diff --git a/src/syntax/fixed_width.rs b/src/syntax/fixed_width.rs index 9e9cf2d..947791a 100644 --- a/src/syntax/fixed_width.rs +++ b/src/syntax/fixed_width.rs @@ -1,39 +1,57 @@ -use nom::{IResult, InputTake}; +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::{space0, space1}, + combinator::{iterator, opt}, + sequence::tuple, + IResult, +}; use super::{ - combinator::{blank_lines, line_ends_iter, node, GreenElement}, + combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, input::Input, + keyword::affiliated_keyword_nodes, SyntaxKind, }; fn fixed_width_node_base(input: Input) -> IResult { - let mut start = 0; - for i in line_ends_iter(input.as_str()) { - let mut iter = input.as_bytes()[start..] - .iter() - .skip_while(|&&b| b == b' ' || b == b'\t'); + let mut b = NodeBuilder::new(); - if matches!(iter.next(), Some(b':')) - && matches!(iter.next(), None | Some(b'\n') | Some(b'\r') | Some(b' ')) - { - start = i; - } else { - break; + let (input, keywords) = affiliated_keyword_nodes(input)?; + b.children.extend(keywords); + + let mut iter = iterator( + input, + opt(tuple(( + space0, + tag(":"), + opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), + eol_or_eof, + ))), + ); + + for (idx, option) in iter.enumerate() { + match option { + Some((ws, common, content, eol)) => { + b.ws(ws); + b.token(SyntaxKind::COMMA, common); + if let Some((ws, text)) = content { + b.ws(ws); + b.text(text); + } + b.text(eol); + } + _ if idx == 0 => return Err(nom::Err::Error(())), + _ => break, } } - if start == 0 { - return Err(nom::Err::Error(())); - } + let (input, _) = iter.finish()?; - let (input, contents) = input.take_split(start); let (input, post_blank) = blank_lines(input)?; - let mut children = vec![]; - children.push(contents.text_token()); - children.extend(post_blank); + b.children.extend(post_blank); - Ok((input, node(SyntaxKind::FIXED_WIDTH, children))) + Ok((input, b.finish(SyntaxKind::FIXED_WIDTH))) } #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))] @@ -58,7 +76,20 @@ fn parse() { ).syntax, @r###" FIXED_WIDTH@0..19 - TEXT@0..14 ": A\n:\n: B\n: C\n" + COMMA@0..1 ":" + WHITESPACE@1..2 " " + TEXT@2..3 "A" + TEXT@3..4 "\n" + COMMA@4..5 ":" + TEXT@5..6 "\n" + COMMA@6..7 ":" + WHITESPACE@7..8 " " + TEXT@8..9 "B" + TEXT@9..10 "\n" + COMMA@10..11 ":" + WHITESPACE@11..12 " " + TEXT@12..13 "C" + TEXT@13..14 "\n" BLANK_LINE@14..15 "\n" BLANK_LINE@15..19 " " "###