diff --git a/src/syntax/emphasis.rs b/src/syntax/emphasis.rs index 24c2c92..c2654ae 100644 --- a/src/syntax/emphasis.rs +++ b/src/syntax/emphasis.rs @@ -5,7 +5,7 @@ use nom::{combinator::map, AsBytes, IResult, Slice}; use super::{ combinator::{node, token, GreenElement}, input::Input, - object::object_nodes, + object::standard_object_nodes, SyntaxKind::*, }; @@ -13,7 +13,7 @@ use super::{ pub fn bold_node(input: Input) -> IResult { let mut parser = map(emphasis(b'*'), |contents| { let mut children = vec![token(STAR, "*")]; - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(token(STAR, "*")); node(BOLD, children) }); @@ -35,7 +35,7 @@ pub fn code_node(input: Input) -> IResult { pub fn strike_node(input: Input) -> IResult { let mut parser = map(emphasis(b'+'), |contents| { let mut children = vec![token(PLUS, "+")]; - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(token(PLUS, "+")); node(STRIKE, children) }); @@ -57,7 +57,7 @@ pub fn verbatim_node(input: Input) -> IResult { pub fn underline_node(input: Input) -> IResult { let mut parser = map(emphasis(b'_'), |contents| { let mut children = vec![token(UNDERSCORE, "_")]; - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(token(UNDERSCORE, "_")); node(UNDERLINE, children) }); @@ -68,7 +68,7 @@ pub fn underline_node(input: Input) -> IResult { pub fn italic_node(input: Input) -> IResult { let mut parser = map(emphasis(b'/'), |contents| { let mut children = vec![token(SLASH, "/")]; - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(token(SLASH, "/")); node(ITALIC, children) }); diff --git a/src/syntax/fn_ref.rs b/src/syntax/fn_ref.rs index 8ba1c69..e24d207 100644 --- a/src/syntax/fn_ref.rs +++ b/src/syntax/fn_ref.rs @@ -9,7 +9,7 @@ use nom::{ use super::{ combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement}, input::Input, - object::object_nodes, + object::standard_object_nodes, SyntaxKind::*, }; @@ -31,7 +31,7 @@ fn fn_ref_node_base(input: Input) -> IResult { let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()]; if let Some((colon, definition)) = definition { children.push(colon); - children.extend(object_nodes(definition)); + children.extend(standard_object_nodes(definition)); } children.push(r_bracket); diff --git a/src/syntax/headline.rs b/src/syntax/headline.rs index 0d1915f..118a1b5 100644 --- a/src/syntax/headline.rs +++ b/src/syntax/headline.rs @@ -15,7 +15,7 @@ use super::{ drawer::property_drawer_node, element::element_nodes, input::Input, - object::object_nodes, + object::standard_object_nodes, planning::planning_node, SyntaxKind::*, }; @@ -54,7 +54,7 @@ fn headline_node_base(input: Input) -> IResult { let (title, tags) = opt(headline_tags_node)(title_and_tags)?; if !title.is_empty() { - b.push(node(HEADLINE_TITLE, object_nodes(title))); + b.push(node(HEADLINE_TITLE, standard_object_nodes(title))); } b.push_opt(tags); b.ws(ws_); diff --git a/src/syntax/latex_environment.rs b/src/syntax/latex_environment.rs index 2136585..b731e1b 100644 --- a/src/syntax/latex_environment.rs +++ b/src/syntax/latex_environment.rs @@ -75,7 +75,7 @@ fn parse() { let to_latex = to_ast::(latex_environment_node); insta::assert_debug_snapshot!( - to_latex(r#"\begin{NAME}\end{NAME}"#).syntax, + to_latex(r"\begin{NAME}\end{NAME}").syntax, @r###" LATEX_ENVIRONMENT@0..22 WHITESPACE@0..0 "" @@ -96,10 +96,10 @@ fn parse() { insta::assert_debug_snapshot!( to_latex( - r#"\begin{align*} + r"\begin{align*} 2x - 5y &= 8 \\ 3x + 9y &= -12 - \end{align*}"# + \end{align*}" ).syntax, @r###" LATEX_ENVIRONMENT@0..70 @@ -121,6 +121,6 @@ fn parse() { let c = ParseConfig::default(); - assert!(latex_environment_node((r#"\begin{equation}\end{align}"#, &c).into()).is_err()); - assert!(latex_environment_node((r#"\begin{_}\end{_}"#, &c).into()).is_err()); + assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err()); + assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err()); } diff --git a/src/syntax/link.rs b/src/syntax/link.rs index 93c93b8..52c22d3 100644 --- a/src/syntax/link.rs +++ b/src/syntax/link.rs @@ -10,6 +10,7 @@ use super::{ l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement, }, input::Input, + object::link_description_object_nodes, SyntaxKind::*, }; @@ -30,7 +31,8 @@ pub fn link_node(input: Input) -> IResult { let mut children = vec![l_bracket2, path.token(LINK_PATH)]; if let Some((r_bracket, l_bracket, desc)) = desc { - children.extend([r_bracket, l_bracket, desc.text_token()]); + children.extend([r_bracket, l_bracket]); + children.extend(link_description_object_nodes(desc)); } children.push(r_bracket2); @@ -83,6 +85,24 @@ fn parse() { "### ); + let link = to_link("[[https://orgmode.org][*bold* description]]"); + insta::assert_debug_snapshot!( + link.syntax, + @r###" + LINK@0..43 + L_BRACKET2@0..2 "[[" + LINK_PATH@2..21 "https://orgmode.org" + R_BRACKET@21..22 "]" + L_BRACKET@22..23 "[" + BOLD@23..29 + STAR@23..24 "*" + TEXT@24..28 "bold" + STAR@28..29 "*" + TEXT@29..41 " description" + R_BRACKET2@41..43 "]]" + "### + ); + let config = &ParseConfig::default(); assert!(link_node(("[[#id][desc]", config).into()).is_err()); diff --git a/src/syntax/list.rs b/src/syntax/list.rs index 8979393..c12d473 100644 --- a/src/syntax/list.rs +++ b/src/syntax/list.rs @@ -16,7 +16,7 @@ use super::{ element::element_node, input::Input, keyword::affiliated_keyword_nodes, - object::object_nodes, + object::standard_object_nodes, paragraph::paragraph_nodes, SyntaxKind::*, }; @@ -188,7 +188,7 @@ fn list_item_tag(input: Input) -> IResult { let (input, ws) = space0(input)?; let (input, colon2) = colon2_token(input)?; - let mut children = object_nodes(tag); + let mut children = standard_object_nodes(tag); children.push(colon2); Ok((input, (node(LIST_ITEM_TAG, children), ws))) @@ -201,7 +201,10 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult { ), } } + + fn link_description(input: Input) -> ObjectPositions { + ObjectPositions { + input, + pos: 0, + finder: jetscii::bytes!( + b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ + b'\\', b'$', /* latex & entity */ + b'@', /* snippet */ + b'c', /* inline call */ + b's', /* inline source */ + b'{', /* macros */ + b'[', /* cookie */ + b'^', /* superscript */ + b'_' /* subscript */ + ), + } + } } impl<'a> Iterator for ObjectPositions<'a> { @@ -70,12 +88,11 @@ impl<'a> Iterator for ObjectPositions<'a> { return None; } - let bytes = &self.input.as_bytes()[self.pos..]; let previous = self.pos; - let i = self.finder.find(bytes)?; - self.pos += i + 1; + let i = self.finder.find(&self.input.as_bytes()[self.pos..])?; + let p = self.pos + i; - let p = self.pos - 1; + self.pos = p + 1; debug_assert!( previous < self.pos && self.pos <= self.input.s.len(), @@ -94,6 +111,31 @@ impl<'a> Iterator for ObjectPositions<'a> { } } +/// parse minimal sets of objects, including +/// - LaTeX fragments ('\\') +/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/') +/// - Entities ('\\') +/// - Superscripts and Subscripts +pub fn minimal_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::minimal, + |i: Input, pre: Input| match &i.as_bytes()[0] { + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'$' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} + /// parses standard sets of objects, including /// /// - Entities @@ -113,16 +155,83 @@ impl<'a> Iterator for ObjectPositions<'a> { /// /// // todo: /// - Citations -pub fn object_nodes(input: Input) -> Vec { - // TODO: - // debug_assert!(!input.is_empty()); +pub fn standard_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::standard, + |i: Input, pre: Input| match &i.as_bytes()[0] { + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'@' => snippet_node(i), + b'{' => macros_node(i), + b'<' => radio_target_node(i) + .or_else(|_| target_node(i)) + .or_else(|_| timestamp_diary_node(i)) + .or_else(|_| timestamp_active_node(i)), + b'[' => cookie_node(i) + .or_else(|_| link_node(i)) + .or_else(|_| fn_ref_node(i)) + .or_else(|_| timestamp_inactive_node(i)), + // NOTE: although not specified in document, inline call and inline src follows the + // same pre tokens rule as text markup + b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), + b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), + b'$' => latex_fragment_node(i), + b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} +pub fn link_description_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::link_description, + |i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] { + b'@' => snippet_node(i), + b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), + b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), + b'{' => macros_node(i), + b'[' => cookie_node(i), + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'$' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} + +fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec +where + F: Fn(Input) -> ObjectPositions, + P: Fn(Input<'a>, Input<'a>) -> IResult, GreenElement, ()>, +{ let mut i = input; let mut nodes = vec![]; 'l: while !i.is_empty() { - for (input, head) in ObjectPositions::standard(i) { - if let Ok((input, pre)) = standard_object_node(input, head) { + for (input, head) in position(i) { + debug_assert!( + input.s.len() >= 2, + "object must have at least two characters: {:?}", + input.s + ); + + if let Ok((input, pre)) = parse(input, head) { if !head.is_empty() { nodes.push(head.text_token()) } @@ -150,106 +259,6 @@ pub fn object_nodes(input: Input) -> Vec { nodes } -/// parse minimal sets of objects, including -/// - LaTeX fragments ('\\') -/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/') -/// - Entities ('\\') -/// - Superscripts and Subscripts -pub fn minimal_object_nodes(input: Input) -> Vec { - let mut i = input; - let mut nodes = vec![]; - - 'l: while !i.is_empty() { - for (input, head) in ObjectPositions::minimal(i) { - if let Ok((input, pre)) = minimal_object_node(input, head) { - if !head.is_empty() { - nodes.push(head.text_token()) - } - nodes.push(pre); - debug_assert!( - input.input_len() < i.input_len(), - "{} < {}", - input.input_len(), - i.input_len() - ); - i = input; - continue 'l; - } - } - nodes.push(i.text_token()); - break; - } - - debug_assert_eq!( - input.as_str(), - nodes.iter().fold(String::new(), |s, i| s + &i.to_string()), - "parser must be lossless" - ); - - nodes -} - -/// parse an object from standard sets -fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult, GreenElement, ()> { - debug_assert!( - i.s.len() >= 2, - "object must have at least two characters: {:?}", - i.s - ); - - match &i.as_bytes()[0] { - b'*' if emphasis::verify_pre(pre.s) => bold_node(i), - b'+' if emphasis::verify_pre(pre.s) => strike_node(i), - b'/' if emphasis::verify_pre(pre.s) => italic_node(i), - b'_' if emphasis::verify_pre(pre.s) => underline_node(i), - b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), - b'~' if emphasis::verify_pre(pre.s) => code_node(i), - b'@' => snippet_node(i), - b'{' => macros_node(i), - b'<' => radio_target_node(i) - .or_else(|_| target_node(i)) - .or_else(|_| timestamp_diary_node(i)) - .or_else(|_| timestamp_active_node(i)), - b'[' => cookie_node(i) - .or_else(|_| link_node(i)) - .or_else(|_| fn_ref_node(i)) - .or_else(|_| timestamp_inactive_node(i)), - // NOTE: although not specified in document, inline call and inline src follows the - // same pre tokens rule as text markup - b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), - b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), - b'$' => latex_fragment_node(i), - b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), - b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i), - b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i), - _ => Err(nom::Err::Error(())), - } -} - -/// parse an object from minimal sets -fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult, GreenElement, ()> { - debug_assert!( - i.s.len() >= 2, - "object must have at least two characters: {:?}", - i.s - ); - - match &i.as_bytes()[0] { - b'*' if emphasis::verify_pre(pre.s) => bold_node(i), - b'+' if emphasis::verify_pre(pre.s) => strike_node(i), - b'/' if emphasis::verify_pre(pre.s) => italic_node(i), - b'_' if emphasis::verify_pre(pre.s) => underline_node(i), - b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), - b'~' if emphasis::verify_pre(pre.s) => code_node(i), - b'$' => latex_fragment_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), - b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i), - b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i), - _ => Err(nom::Err::Error(())), - } -} - #[test] fn positions() { let config = crate::ParseConfig::default(); @@ -286,7 +295,7 @@ fn parse() { let t = |input: &str| { let config = &ParseConfig::default(); - let children = object_nodes((input, config).into()); + let children = standard_object_nodes((input, config).into()); SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap()) }; diff --git a/src/syntax/paragraph.rs b/src/syntax/paragraph.rs index caa8c65..c80c1c8 100644 --- a/src/syntax/paragraph.rs +++ b/src/syntax/paragraph.rs @@ -4,7 +4,7 @@ use super::{ combinator::{blank_lines, line_ends_iter, node, GreenElement}, input::Input, keyword::affiliated_keyword_nodes, - object::object_nodes, + object::standard_object_nodes, SyntaxKind, }; @@ -51,7 +51,7 @@ fn paragraph_node_base(input: Input) -> IResult { let mut children = vec![]; children.extend(keywords); - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.extend(post_blank); Ok((input, node(SyntaxKind::PARAGRAPH, children))) diff --git a/src/syntax/subscript_superscript.rs b/src/syntax/subscript_superscript.rs index 1a45efa..c75ee07 100644 --- a/src/syntax/subscript_superscript.rs +++ b/src/syntax/subscript_superscript.rs @@ -9,7 +9,7 @@ use nom::{ use crate::{ syntax::{ combinator::{caret_token, underscore_token}, - object::object_nodes, + object::standard_object_nodes, }, SyntaxKind, }; @@ -29,7 +29,7 @@ pub fn superscript_node(input: Input) -> IResult { Ok((input, node(SyntaxKind::SUPERSCRIPT, children))) } else if let Ok((input, (l, contents, r))) = template1(input) { children.push(l); - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(r); Ok((input, node(SyntaxKind::SUPERSCRIPT, children))) } else if let Ok((input, (sign, contents))) = template2(input) { @@ -53,7 +53,7 @@ pub fn subscript_node(input: Input) -> IResult { Ok((input, node(SyntaxKind::SUBSCRIPT, children))) } else if let Ok((input, (l, contents, r))) = template1(input) { children.push(l); - children.extend(object_nodes(contents)); + children.extend(standard_object_nodes(contents)); children.push(r); Ok((input, node(SyntaxKind::SUBSCRIPT, children))) } else if let Ok((input, (sign, contents))) = template2(input) { diff --git a/src/syntax/table.rs b/src/syntax/table.rs index cb14403..803782d 100644 --- a/src/syntax/table.rs +++ b/src/syntax/table.rs @@ -9,7 +9,7 @@ use nom::{ use super::{ combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder}, input::Input, - object::object_nodes, + object::standard_object_nodes, SyntaxKind::*, }; @@ -73,11 +73,11 @@ fn table_standard_row_node(input: Input) -> Result> { { Some(idx) => { let (ws, cell) = input.take_split(idx + 1); - b.push(node(ORG_TABLE_CELL, object_nodes(cell))); + b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell))); b.ws(ws); } _ => { - b.push(node(ORG_TABLE_CELL, object_nodes(input))); + b.push(node(ORG_TABLE_CELL, standard_object_nodes(input))); } } });