diff --git a/src/syntax/object.rs b/src/syntax/object.rs index 9903103..961ad05 100644 --- a/src/syntax/object.rs +++ b/src/syntax/object.rs @@ -26,13 +26,26 @@ struct ObjectPositions<'a> { } impl ObjectPositions<'_> { - fn new(input: Input) -> ObjectPositions { + fn standard(input: Input) -> ObjectPositions { ObjectPositions { input, pos: 0, next: Some(0), finder: jetscii::bytes!( - b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n', b'\\', b'$' + b' ', b'(', b'{', b'\'', b'"', b'\n', /* */ + b'\\', b'$', b'@', b'<', b'[' + ), + } + } + + fn minimal(input: Input) -> ObjectPositions { + ObjectPositions { + input, + pos: 0, + next: Some(0), + finder: jetscii::bytes!( + b' ', b'(', b'{', b'\'', b'"', b'\n', /* */ + b'\\', b'$' ), } } @@ -83,6 +96,25 @@ impl<'a> Iterator for ObjectPositions<'a> { } } +/// parses standard sets of objects, including +/// +/// - Entities +/// - LaTeX Fragments +/// - Export Snippets +/// - Footnote References +/// - Inline Babel Calls +/// - Inline Source Blocks +/// - Links +/// - Macros +/// - Targets and Radio Targets +/// - Statistics Cookies +/// - Timestamps +/// - Text Markup (bold code strike verbatim underline italic) +/// +/// // todo: +/// - Citations +/// - Line Breaks +/// - Subscript and Superscript pub fn object_nodes(input: Input) -> Vec { // TODO: // debug_assert!(!input.is_empty()); @@ -91,13 +123,13 @@ pub fn object_nodes(input: Input) -> Vec { let mut nodes = vec![]; 'l: while !i.is_empty() { - for (input, head) in ObjectPositions::new(i) { + for (input, head) in ObjectPositions::standard(i) { debug_assert!( input.s.len() >= 2, "object must have at least two characters: {:?}", input.s ); - if let Ok((input, node)) = object_node(input) { + if let Ok((input, node)) = standard_object_node(input) { if !head.is_empty() { nodes.push(head.text_token()) } @@ -125,8 +157,54 @@ pub fn object_nodes(input: Input) -> Vec { nodes } -/// Recognizes an org-mode element expect text -fn object_node(i: Input) -> IResult { +/// parse minimal sets of objects, including +/// - LaTeX fragments ('\\') +/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/') +/// - Entities ('\\') +/// +/// // todo: +/// - Superscripts and Subscripts +pub fn minimal_object_nodes(input: Input) -> Vec { + let mut i = input; + let mut nodes = vec![]; + + 'l: while !i.is_empty() { + for (input, head) in ObjectPositions::minimal(i) { + debug_assert!( + input.s.len() >= 2, + "object must have at least two characters: {:?}", + input.s + ); + if let Ok((input, node)) = minimal_object_node(input) { + if !head.is_empty() { + nodes.push(head.text_token()) + } + nodes.push(node); + debug_assert!( + input.input_len() < i.input_len(), + "{} < {}", + input.input_len(), + i.input_len() + ); + i = input; + continue 'l; + } + } + nodes.push(i.text_token()); + break; + } + + debug_assert_eq!( + input.as_str(), + nodes.iter().fold(String::new(), |s, i| s + &i.to_string()), + "parser must be lossless" + ); + + nodes +} + +/// parse an object from standard sets +fn standard_object_node(i: Input) -> IResult { match &i.as_bytes()[0] { b'*' => bold_node(i), b'+' => strike_node(i), @@ -152,25 +230,40 @@ fn object_node(i: Input) -> IResult { } } +/// parse an object from minimal sets +fn minimal_object_node(i: Input) -> IResult { + match &i.as_bytes()[0] { + b'*' => bold_node(i), + b'+' => strike_node(i), + b'/' => italic_node(i), + b'_' => underline_node(i), + b'=' => verbatim_node(i), + b'~' => code_node(i), + b'$' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + _ => Err(nom::Err::Error(())), + } +} + #[test] fn positions() { let config = crate::ParseConfig::default(); - let vec = ObjectPositions::new(("*", &config).into()).collect::>(); + let vec = ObjectPositions::standard(("*", &config).into()).collect::>(); assert!(vec.is_empty()); - let vec = ObjectPositions::new(("*{", &config).into()).collect::>(); + let vec = ObjectPositions::standard(("*{", &config).into()).collect::>(); assert_eq!(vec.len(), 1); assert_eq!(vec[0].0.s, "*{"); // https://github.com/PoiScript/orgize/issues/69 - let vec = ObjectPositions::new(("{3}", &config).into()).collect::>(); + let vec = ObjectPositions::standard(("{3}", &config).into()).collect::>(); assert_eq!(vec.len(), 2); assert_eq!(vec[0].0.s, "{3}"); // FIXME: assert_eq!(vec[1].0.s, "{3}"); - let vec = ObjectPositions::new(("*{()}//s\nc<<", &config).into()).collect::>(); + let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::>(); assert_eq!(vec.len(), 6); assert_eq!(vec[0].0.s, "*{()}//s\nc<<"); assert_eq!(vec[1].0.s, "{()}//s\nc<<"); diff --git a/src/syntax/radio_target.rs b/src/syntax/radio_target.rs index 3fc39f5..74fbfac 100644 --- a/src/syntax/radio_target.rs +++ b/src/syntax/radio_target.rs @@ -8,11 +8,10 @@ use nom::{ use super::{ combinator::{l_angle3_token, node, r_angle3_token, GreenElement}, input::Input, + object::minimal_object_nodes, SyntaxKind::*, }; -// TODO: text-markup, entities, latex-fragments, subscript and superscript - pub fn radio_target_node(input: Input) -> IResult { let mut parser = map( tuple(( @@ -26,7 +25,10 @@ pub fn radio_target_node(input: Input) -> IResult { r_angle3_token, )), |(l_angle3, contents, r_angle3)| { - node(RADIO_TARGET, [l_angle3, contents.text_token(), r_angle3]) + let mut children = vec![l_angle3]; + children.extend(minimal_object_nodes(contents)); + children.push(r_angle3); + node(RADIO_TARGET, children) }, ); crate::lossless_parser!(parser, input) @@ -58,6 +60,18 @@ fn parse() { "### ); + insta::assert_debug_snapshot!( + to_radio_target("<<<\\alpha>>>").syntax, + @r###" + RADIO_TARGET@0..12 + L_ANGLE3@0..3 "<<<" + ENTITY@3..9 + BACKSLASH@3..4 "\\" + TEXT@4..9 "alpha" + R_ANGLE3@9..12 ">>>" + "### + ); + let config = &ParseConfig::default(); assert!(radio_target_node(("<<>>", config).into()).is_err());