diff --git a/src/syntax/emphasis.rs b/src/syntax/emphasis.rs
index 24c2c92..c2654ae 100644
--- a/src/syntax/emphasis.rs
+++ b/src/syntax/emphasis.rs
@@ -5,7 +5,7 @@ use nom::{combinator::map, AsBytes, IResult, Slice};
use super::{
combinator::{node, token, GreenElement},
input::Input,
- object::object_nodes,
+ object::standard_object_nodes,
SyntaxKind::*,
};
@@ -13,7 +13,7 @@ use super::{
pub fn bold_node(input: Input) -> IResult {
let mut parser = map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
});
@@ -35,7 +35,7 @@ pub fn code_node(input: Input) -> IResult {
pub fn strike_node(input: Input) -> IResult {
let mut parser = map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
});
@@ -57,7 +57,7 @@ pub fn verbatim_node(input: Input) -> IResult {
pub fn underline_node(input: Input) -> IResult {
let mut parser = map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
});
@@ -68,7 +68,7 @@ pub fn underline_node(input: Input) -> IResult {
pub fn italic_node(input: Input) -> IResult {
let mut parser = map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
});
diff --git a/src/syntax/fn_ref.rs b/src/syntax/fn_ref.rs
index 8ba1c69..e24d207 100644
--- a/src/syntax/fn_ref.rs
+++ b/src/syntax/fn_ref.rs
@@ -9,7 +9,7 @@ use nom::{
use super::{
combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement},
input::Input,
- object::object_nodes,
+ object::standard_object_nodes,
SyntaxKind::*,
};
@@ -31,7 +31,7 @@ fn fn_ref_node_base(input: Input) -> IResult {
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
- children.extend(object_nodes(definition));
+ children.extend(standard_object_nodes(definition));
}
children.push(r_bracket);
diff --git a/src/syntax/headline.rs b/src/syntax/headline.rs
index 0d1915f..118a1b5 100644
--- a/src/syntax/headline.rs
+++ b/src/syntax/headline.rs
@@ -15,7 +15,7 @@ use super::{
drawer::property_drawer_node,
element::element_nodes,
input::Input,
- object::object_nodes,
+ object::standard_object_nodes,
planning::planning_node,
SyntaxKind::*,
};
@@ -54,7 +54,7 @@ fn headline_node_base(input: Input) -> IResult {
let (title, tags) = opt(headline_tags_node)(title_and_tags)?;
if !title.is_empty() {
- b.push(node(HEADLINE_TITLE, object_nodes(title)));
+ b.push(node(HEADLINE_TITLE, standard_object_nodes(title)));
}
b.push_opt(tags);
b.ws(ws_);
diff --git a/src/syntax/latex_environment.rs b/src/syntax/latex_environment.rs
index 2136585..b731e1b 100644
--- a/src/syntax/latex_environment.rs
+++ b/src/syntax/latex_environment.rs
@@ -75,7 +75,7 @@ fn parse() {
let to_latex = to_ast::(latex_environment_node);
insta::assert_debug_snapshot!(
- to_latex(r#"\begin{NAME}\end{NAME}"#).syntax,
+ to_latex(r"\begin{NAME}\end{NAME}").syntax,
@r###"
LATEX_ENVIRONMENT@0..22
WHITESPACE@0..0 ""
@@ -96,10 +96,10 @@ fn parse() {
insta::assert_debug_snapshot!(
to_latex(
- r#"\begin{align*}
+ r"\begin{align*}
2x - 5y &= 8 \\
3x + 9y &= -12
- \end{align*}"#
+ \end{align*}"
).syntax,
@r###"
LATEX_ENVIRONMENT@0..70
@@ -121,6 +121,6 @@ fn parse() {
let c = ParseConfig::default();
- assert!(latex_environment_node((r#"\begin{equation}\end{align}"#, &c).into()).is_err());
- assert!(latex_environment_node((r#"\begin{_}\end{_}"#, &c).into()).is_err());
+ assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err());
+ assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err());
}
diff --git a/src/syntax/link.rs b/src/syntax/link.rs
index 93c93b8..52c22d3 100644
--- a/src/syntax/link.rs
+++ b/src/syntax/link.rs
@@ -10,6 +10,7 @@ use super::{
l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement,
},
input::Input,
+ object::link_description_object_nodes,
SyntaxKind::*,
};
@@ -30,7 +31,8 @@ pub fn link_node(input: Input) -> IResult {
let mut children = vec![l_bracket2, path.token(LINK_PATH)];
if let Some((r_bracket, l_bracket, desc)) = desc {
- children.extend([r_bracket, l_bracket, desc.text_token()]);
+ children.extend([r_bracket, l_bracket]);
+ children.extend(link_description_object_nodes(desc));
}
children.push(r_bracket2);
@@ -83,6 +85,24 @@ fn parse() {
"###
);
+ let link = to_link("[[https://orgmode.org][*bold* description]]");
+ insta::assert_debug_snapshot!(
+ link.syntax,
+ @r###"
+ LINK@0..43
+ L_BRACKET2@0..2 "[["
+ LINK_PATH@2..21 "https://orgmode.org"
+ R_BRACKET@21..22 "]"
+ L_BRACKET@22..23 "["
+ BOLD@23..29
+ STAR@23..24 "*"
+ TEXT@24..28 "bold"
+ STAR@28..29 "*"
+ TEXT@29..41 " description"
+ R_BRACKET2@41..43 "]]"
+ "###
+ );
+
let config = &ParseConfig::default();
assert!(link_node(("[[#id][desc]", config).into()).is_err());
diff --git a/src/syntax/list.rs b/src/syntax/list.rs
index 8979393..c12d473 100644
--- a/src/syntax/list.rs
+++ b/src/syntax/list.rs
@@ -16,7 +16,7 @@ use super::{
element::element_node,
input::Input,
keyword::affiliated_keyword_nodes,
- object::object_nodes,
+ object::standard_object_nodes,
paragraph::paragraph_nodes,
SyntaxKind::*,
};
@@ -188,7 +188,7 @@ fn list_item_tag(input: Input) -> IResult {
let (input, ws) = space0(input)?;
let (input, colon2) = colon2_token(input)?;
- let mut children = object_nodes(tag);
+ let mut children = standard_object_nodes(tag);
children.push(colon2);
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
@@ -201,7 +201,10 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult {
),
}
}
+
+ fn link_description(input: Input) -> ObjectPositions {
+ ObjectPositions {
+ input,
+ pos: 0,
+ finder: jetscii::bytes!(
+ b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
+ b'\\', b'$', /* latex & entity */
+ b'@', /* snippet */
+ b'c', /* inline call */
+ b's', /* inline source */
+ b'{', /* macros */
+ b'[', /* cookie */
+ b'^', /* superscript */
+ b'_' /* subscript */
+ ),
+ }
+ }
}
impl<'a> Iterator for ObjectPositions<'a> {
@@ -70,12 +88,11 @@ impl<'a> Iterator for ObjectPositions<'a> {
return None;
}
- let bytes = &self.input.as_bytes()[self.pos..];
let previous = self.pos;
- let i = self.finder.find(bytes)?;
- self.pos += i + 1;
+ let i = self.finder.find(&self.input.as_bytes()[self.pos..])?;
+ let p = self.pos + i;
- let p = self.pos - 1;
+ self.pos = p + 1;
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
@@ -94,6 +111,31 @@ impl<'a> Iterator for ObjectPositions<'a> {
}
}
+/// parse minimal sets of objects, including
+/// - LaTeX fragments ('\\')
+/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
+/// - Entities ('\\')
+/// - Superscripts and Subscripts
+pub fn minimal_object_nodes(input: Input) -> Vec {
+ object_nodes(
+ ObjectPositions::minimal,
+ |i: Input, pre: Input| match &i.as_bytes()[0] {
+ b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
+ b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
+ b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
+ b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
+ b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
+ b'~' if emphasis::verify_pre(pre.s) => code_node(i),
+ b'$' => latex_fragment_node(i),
+ b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
+ b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
+ b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
+ _ => Err(nom::Err::Error(())),
+ },
+ input,
+ )
+}
+
/// parses standard sets of objects, including
///
/// - Entities
@@ -113,16 +155,83 @@ impl<'a> Iterator for ObjectPositions<'a> {
///
/// // todo:
/// - Citations
-pub fn object_nodes(input: Input) -> Vec {
- // TODO:
- // debug_assert!(!input.is_empty());
+pub fn standard_object_nodes(input: Input) -> Vec {
+ object_nodes(
+ ObjectPositions::standard,
+ |i: Input, pre: Input| match &i.as_bytes()[0] {
+ b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
+ b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
+ b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
+ b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
+ b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
+ b'~' if emphasis::verify_pre(pre.s) => code_node(i),
+ b'@' => snippet_node(i),
+ b'{' => macros_node(i),
+ b'<' => radio_target_node(i)
+ .or_else(|_| target_node(i))
+ .or_else(|_| timestamp_diary_node(i))
+ .or_else(|_| timestamp_active_node(i)),
+ b'[' => cookie_node(i)
+ .or_else(|_| link_node(i))
+ .or_else(|_| fn_ref_node(i))
+ .or_else(|_| timestamp_inactive_node(i)),
+ // NOTE: although not specified in document, inline call and inline src follows the
+ // same pre tokens rule as text markup
+ b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
+ b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
+ b'$' => latex_fragment_node(i),
+ b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
+ b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
+ b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
+ b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
+ _ => Err(nom::Err::Error(())),
+ },
+ input,
+ )
+}
+pub fn link_description_object_nodes(input: Input) -> Vec {
+ object_nodes(
+ ObjectPositions::link_description,
+ |i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] {
+ b'@' => snippet_node(i),
+ b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
+ b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
+ b'{' => macros_node(i),
+ b'[' => cookie_node(i),
+ b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
+ b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
+ b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
+ b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
+ b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
+ b'~' if emphasis::verify_pre(pre.s) => code_node(i),
+ b'$' => latex_fragment_node(i),
+ b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
+ b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
+ b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
+ _ => Err(nom::Err::Error(())),
+ },
+ input,
+ )
+}
+
+fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec
+where
+ F: Fn(Input) -> ObjectPositions,
+ P: Fn(Input<'a>, Input<'a>) -> IResult, GreenElement, ()>,
+{
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
- for (input, head) in ObjectPositions::standard(i) {
- if let Ok((input, pre)) = standard_object_node(input, head) {
+ for (input, head) in position(i) {
+ debug_assert!(
+ input.s.len() >= 2,
+ "object must have at least two characters: {:?}",
+ input.s
+ );
+
+ if let Ok((input, pre)) = parse(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
@@ -150,106 +259,6 @@ pub fn object_nodes(input: Input) -> Vec {
nodes
}
-/// parse minimal sets of objects, including
-/// - LaTeX fragments ('\\')
-/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
-/// - Entities ('\\')
-/// - Superscripts and Subscripts
-pub fn minimal_object_nodes(input: Input) -> Vec {
- let mut i = input;
- let mut nodes = vec![];
-
- 'l: while !i.is_empty() {
- for (input, head) in ObjectPositions::minimal(i) {
- if let Ok((input, pre)) = minimal_object_node(input, head) {
- if !head.is_empty() {
- nodes.push(head.text_token())
- }
- nodes.push(pre);
- debug_assert!(
- input.input_len() < i.input_len(),
- "{} < {}",
- input.input_len(),
- i.input_len()
- );
- i = input;
- continue 'l;
- }
- }
- nodes.push(i.text_token());
- break;
- }
-
- debug_assert_eq!(
- input.as_str(),
- nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
- "parser must be lossless"
- );
-
- nodes
-}
-
-/// parse an object from standard sets
-fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult, GreenElement, ()> {
- debug_assert!(
- i.s.len() >= 2,
- "object must have at least two characters: {:?}",
- i.s
- );
-
- match &i.as_bytes()[0] {
- b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
- b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
- b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
- b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
- b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
- b'~' if emphasis::verify_pre(pre.s) => code_node(i),
- b'@' => snippet_node(i),
- b'{' => macros_node(i),
- b'<' => radio_target_node(i)
- .or_else(|_| target_node(i))
- .or_else(|_| timestamp_diary_node(i))
- .or_else(|_| timestamp_active_node(i)),
- b'[' => cookie_node(i)
- .or_else(|_| link_node(i))
- .or_else(|_| fn_ref_node(i))
- .or_else(|_| timestamp_inactive_node(i)),
- // NOTE: although not specified in document, inline call and inline src follows the
- // same pre tokens rule as text markup
- b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
- b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
- b'$' => latex_fragment_node(i),
- b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
- b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
- b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
- b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
- _ => Err(nom::Err::Error(())),
- }
-}
-
-/// parse an object from minimal sets
-fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult, GreenElement, ()> {
- debug_assert!(
- i.s.len() >= 2,
- "object must have at least two characters: {:?}",
- i.s
- );
-
- match &i.as_bytes()[0] {
- b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
- b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
- b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
- b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
- b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
- b'~' if emphasis::verify_pre(pre.s) => code_node(i),
- b'$' => latex_fragment_node(i),
- b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
- b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
- b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
- _ => Err(nom::Err::Error(())),
- }
-}
-
#[test]
fn positions() {
let config = crate::ParseConfig::default();
@@ -286,7 +295,7 @@ fn parse() {
let t = |input: &str| {
let config = &ParseConfig::default();
- let children = object_nodes((input, config).into());
+ let children = standard_object_nodes((input, config).into());
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
};
diff --git a/src/syntax/paragraph.rs b/src/syntax/paragraph.rs
index caa8c65..c80c1c8 100644
--- a/src/syntax/paragraph.rs
+++ b/src/syntax/paragraph.rs
@@ -4,7 +4,7 @@ use super::{
combinator::{blank_lines, line_ends_iter, node, GreenElement},
input::Input,
keyword::affiliated_keyword_nodes,
- object::object_nodes,
+ object::standard_object_nodes,
SyntaxKind,
};
@@ -51,7 +51,7 @@ fn paragraph_node_base(input: Input) -> IResult {
let mut children = vec![];
children.extend(keywords);
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.extend(post_blank);
Ok((input, node(SyntaxKind::PARAGRAPH, children)))
diff --git a/src/syntax/subscript_superscript.rs b/src/syntax/subscript_superscript.rs
index 1a45efa..c75ee07 100644
--- a/src/syntax/subscript_superscript.rs
+++ b/src/syntax/subscript_superscript.rs
@@ -9,7 +9,7 @@ use nom::{
use crate::{
syntax::{
combinator::{caret_token, underscore_token},
- object::object_nodes,
+ object::standard_object_nodes,
},
SyntaxKind,
};
@@ -29,7 +29,7 @@ pub fn superscript_node(input: Input) -> IResult {
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
@@ -53,7 +53,7 @@ pub fn subscript_node(input: Input) -> IResult {
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
- children.extend(object_nodes(contents));
+ children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
diff --git a/src/syntax/table.rs b/src/syntax/table.rs
index cb14403..803782d 100644
--- a/src/syntax/table.rs
+++ b/src/syntax/table.rs
@@ -9,7 +9,7 @@ use nom::{
use super::{
combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder},
input::Input,
- object::object_nodes,
+ object::standard_object_nodes,
SyntaxKind::*,
};
@@ -73,11 +73,11 @@ fn table_standard_row_node(input: Input) -> Result> {
{
Some(idx) => {
let (ws, cell) = input.take_split(idx + 1);
- b.push(node(ORG_TABLE_CELL, object_nodes(cell)));
+ b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell)));
b.ws(ws);
}
_ => {
- b.push(node(ORG_TABLE_CELL, object_nodes(input)));
+ b.push(node(ORG_TABLE_CELL, standard_object_nodes(input)));
}
}
});