feat: support objects in link description

This commit is contained in:
PoiScript 2023-11-23 15:47:14 +08:00
parent 590212fddb
commit 8fcfd60712
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
10 changed files with 168 additions and 136 deletions

View file

@ -5,7 +5,7 @@ use nom::{combinator::map, AsBytes, IResult, Slice};
use super::{
combinator::{node, token, GreenElement},
input::Input,
object::object_nodes,
object::standard_object_nodes,
SyntaxKind::*,
};
@ -13,7 +13,7 @@ use super::{
pub fn bold_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
});
@ -35,7 +35,7 @@ pub fn code_node(input: Input) -> IResult<Input, GreenElement, ()> {
pub fn strike_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
});
@ -57,7 +57,7 @@ pub fn verbatim_node(input: Input) -> IResult<Input, GreenElement, ()> {
pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
});
@ -68,7 +68,7 @@ pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
pub fn italic_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
});

View file

@ -9,7 +9,7 @@ use nom::{
use super::{
combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement},
input::Input,
object::object_nodes,
object::standard_object_nodes,
SyntaxKind::*,
};
@ -31,7 +31,7 @@ fn fn_ref_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
children.extend(object_nodes(definition));
children.extend(standard_object_nodes(definition));
}
children.push(r_bracket);

View file

@ -15,7 +15,7 @@ use super::{
drawer::property_drawer_node,
element::element_nodes,
input::Input,
object::object_nodes,
object::standard_object_nodes,
planning::planning_node,
SyntaxKind::*,
};
@ -54,7 +54,7 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (title, tags) = opt(headline_tags_node)(title_and_tags)?;
if !title.is_empty() {
b.push(node(HEADLINE_TITLE, object_nodes(title)));
b.push(node(HEADLINE_TITLE, standard_object_nodes(title)));
}
b.push_opt(tags);
b.ws(ws_);

View file

@ -75,7 +75,7 @@ fn parse() {
let to_latex = to_ast::<LatexEnvironment>(latex_environment_node);
insta::assert_debug_snapshot!(
to_latex(r#"\begin{NAME}\end{NAME}"#).syntax,
to_latex(r"\begin{NAME}\end{NAME}").syntax,
@r###"
LATEX_ENVIRONMENT@0..22
WHITESPACE@0..0 ""
@ -96,10 +96,10 @@ fn parse() {
insta::assert_debug_snapshot!(
to_latex(
r#"\begin{align*}
r"\begin{align*}
2x - 5y &= 8 \\
3x + 9y &= -12
\end{align*}"#
\end{align*}"
).syntax,
@r###"
LATEX_ENVIRONMENT@0..70
@ -121,6 +121,6 @@ fn parse() {
let c = ParseConfig::default();
assert!(latex_environment_node((r#"\begin{equation}\end{align}"#, &c).into()).is_err());
assert!(latex_environment_node((r#"\begin{_}\end{_}"#, &c).into()).is_err());
assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err());
assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err());
}

View file

@ -10,6 +10,7 @@ use super::{
l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement,
},
input::Input,
object::link_description_object_nodes,
SyntaxKind::*,
};
@ -30,7 +31,8 @@ pub fn link_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![l_bracket2, path.token(LINK_PATH)];
if let Some((r_bracket, l_bracket, desc)) = desc {
children.extend([r_bracket, l_bracket, desc.text_token()]);
children.extend([r_bracket, l_bracket]);
children.extend(link_description_object_nodes(desc));
}
children.push(r_bracket2);
@ -83,6 +85,24 @@ fn parse() {
"###
);
let link = to_link("[[https://orgmode.org][*bold* description]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..43
L_BRACKET2@0..2 "[["
LINK_PATH@2..21 "https://orgmode.org"
R_BRACKET@21..22 "]"
L_BRACKET@22..23 "["
BOLD@23..29
STAR@23..24 "*"
TEXT@24..28 "bold"
STAR@28..29 "*"
TEXT@29..41 " description"
R_BRACKET2@41..43 "]]"
"###
);
let config = &ParseConfig::default();
assert!(link_node(("[[#id][desc]", config).into()).is_err());

View file

@ -16,7 +16,7 @@ use super::{
element::element_node,
input::Input,
keyword::affiliated_keyword_nodes,
object::object_nodes,
object::standard_object_nodes,
paragraph::paragraph_nodes,
SyntaxKind::*,
};
@ -188,7 +188,7 @@ fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, ws) = space0(input)?;
let (input, colon2) = colon2_token(input)?;
let mut children = object_nodes(tag);
let mut children = standard_object_nodes(tag);
children.push(colon2);
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
@ -201,7 +201,10 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, (bool,
input.of(""),
(
false,
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
node(
LIST_ITEM_CONTENT,
[node(PARAGRAPH, standard_object_nodes(input))],
),
),
));
};

View file

@ -60,6 +60,24 @@ impl ObjectPositions<'_> {
),
}
}
fn link_description(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'@', /* snippet */
b'c', /* inline call */
b's', /* inline source */
b'{', /* macros */
b'[', /* cookie */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
}
impl<'a> Iterator for ObjectPositions<'a> {
@ -70,12 +88,11 @@ impl<'a> Iterator for ObjectPositions<'a> {
return None;
}
let bytes = &self.input.as_bytes()[self.pos..];
let previous = self.pos;
let i = self.finder.find(bytes)?;
self.pos += i + 1;
let i = self.finder.find(&self.input.as_bytes()[self.pos..])?;
let p = self.pos + i;
let p = self.pos - 1;
self.pos = p + 1;
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
@ -94,6 +111,31 @@ impl<'a> Iterator for ObjectPositions<'a> {
}
}
/// parse minimal sets of objects, including
/// - LaTeX fragments ('\\')
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
/// - Entities ('\\')
/// - Superscripts and Subscripts
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::minimal,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
/// parses standard sets of objects, including
///
/// - Entities
@ -113,16 +155,83 @@ impl<'a> Iterator for ObjectPositions<'a> {
///
/// // todo:
/// - Citations
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
// TODO:
// debug_assert!(!input.is_empty());
pub fn standard_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::standard,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))
.or_else(|_| timestamp_active_node(i)),
b'[' => cookie_node(i)
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
// NOTE: although not specified in document, inline call and inline src follows the
// same pre tokens rule as text markup
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
pub fn link_description_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::link_description,
|i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] {
b'@' => snippet_node(i),
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'{' => macros_node(i),
b'[' => cookie_node(i),
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec<GreenElement>
where
F: Fn(Input) -> ObjectPositions,
P: Fn(Input<'a>, Input<'a>) -> IResult<Input<'a>, GreenElement, ()>,
{
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in ObjectPositions::standard(i) {
if let Ok((input, pre)) = standard_object_node(input, head) {
for (input, head) in position(i) {
debug_assert!(
input.s.len() >= 2,
"object must have at least two characters: {:?}",
input.s
);
if let Ok((input, pre)) = parse(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
@ -150,106 +259,6 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
nodes
}
/// parse minimal sets of objects, including
/// - LaTeX fragments ('\\')
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
/// - Entities ('\\')
/// - Superscripts and Subscripts
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in ObjectPositions::minimal(i) {
if let Ok((input, pre)) = minimal_object_node(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
nodes.push(pre);
debug_assert!(
input.input_len() < i.input_len(),
"{} < {}",
input.input_len(),
i.input_len()
);
i = input;
continue 'l;
}
}
nodes.push(i.text_token());
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
"parser must be lossless"
);
nodes
}
/// parse an object from standard sets
fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
debug_assert!(
i.s.len() >= 2,
"object must have at least two characters: {:?}",
i.s
);
match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))
.or_else(|_| timestamp_active_node(i)),
b'[' => cookie_node(i)
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
// NOTE: although not specified in document, inline call and inline src follows the
// same pre tokens rule as text markup
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
}
}
/// parse an object from minimal sets
fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
debug_assert!(
i.s.len() >= 2,
"object must have at least two characters: {:?}",
i.s
);
match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
}
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
@ -286,7 +295,7 @@ fn parse() {
let t = |input: &str| {
let config = &ParseConfig::default();
let children = object_nodes((input, config).into());
let children = standard_object_nodes((input, config).into());
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
};

View file

@ -4,7 +4,7 @@ use super::{
combinator::{blank_lines, line_ends_iter, node, GreenElement},
input::Input,
keyword::affiliated_keyword_nodes,
object::object_nodes,
object::standard_object_nodes,
SyntaxKind,
};
@ -51,7 +51,7 @@ fn paragraph_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![];
children.extend(keywords);
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.extend(post_blank);
Ok((input, node(SyntaxKind::PARAGRAPH, children)))

View file

@ -9,7 +9,7 @@ use nom::{
use crate::{
syntax::{
combinator::{caret_token, underscore_token},
object::object_nodes,
object::standard_object_nodes,
},
SyntaxKind,
};
@ -29,7 +29,7 @@ pub fn superscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
@ -53,7 +53,7 @@ pub fn subscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(object_nodes(contents));
children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {

View file

@ -9,7 +9,7 @@ use nom::{
use super::{
combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder},
input::Input,
object::object_nodes,
object::standard_object_nodes,
SyntaxKind::*,
};
@ -73,11 +73,11 @@ fn table_standard_row_node(input: Input) -> Result<GreenElement, nom::Err<()>> {
{
Some(idx) => {
let (ws, cell) = input.take_split(idx + 1);
b.push(node(ORG_TABLE_CELL, object_nodes(cell)));
b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell)));
b.ws(ws);
}
_ => {
b.push(node(ORG_TABLE_CELL, object_nodes(input)));
b.push(node(ORG_TABLE_CELL, standard_object_nodes(input)));
}
}
});