feat: update list node parsing

This commit is contained in:
PoiScript 2023-11-16 18:50:33 +08:00
parent ed987d468a
commit b7ddc0f076
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
10 changed files with 411 additions and 242 deletions

View file

@ -17,6 +17,7 @@ use super::{
input::Input,
keyword::affiliated_keyword_nodes,
object::object_nodes,
paragraph::paragraph_nodes,
SyntaxKind::*,
};
@ -28,24 +29,35 @@ pub fn list_node(input: Input) -> IResult<Input, GreenElement, ()> {
fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
let (input, first_indent) = space0(input)?;
let (input, first_item) = list_item_node(first_indent, input)?;
let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?;
let mut children = vec![];
children.extend(affiliated_keywords);
children.push(first_item);
let mut input = input;
while !input.is_empty() {
while !ends_with_empty_blank_lines && !input.is_empty() {
let (input_, indent) = space0(input)?;
if indent.input_len() != first_indent.input_len() {
break;
}
if let Ok((input_, list_item)) = list_item_node(indent, input_) {
children.push(list_item);
input = input_;
} else {
let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_)
else {
break;
};
children.push(list_item);
debug_assert!(
input.input_len() > input_.input_len(),
"{} > {}",
input.input_len(),
input_.input_len(),
);
input = input_;
if ends_with_empty_blank_lines {
break;
}
}
@ -57,7 +69,11 @@ fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
Ok((input, node(LIST, children)))
}
fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
#[tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s))]
fn list_item_node<'a>(
indent: Input<'a>,
input: Input<'a>,
) -> IResult<Input<'a>, (bool, GreenElement), ()> {
let (input, bullet) = recognize(tuple((
alt((
tag("+"),
@ -91,7 +107,9 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
let (input, counter) = opt(list_item_counter)(input)?;
let (input, checkbox) = opt(list_item_checkbox)(input)?;
let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?;
let (input, content) = list_item_content_node(input, indent.input_len())?;
let (input, (ends_with_empty_blank_lines, content)) =
list_item_content_node(input, indent.input_len())?;
let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?;
let mut children = vec![
indent.token(LIST_ITEM_INDENT),
@ -109,10 +127,17 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
}
children.push(content);
if let Some(post_blank) = post_blank {
children.extend(post_blank);
}
Ok((input, node(LIST_ITEM, children)))
Ok((
input,
(ends_with_empty_blank_lines, node(LIST_ITEM, children)),
))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)),
@ -129,6 +154,7 @@ fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()>
Ok((input, (node, ws)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((
@ -151,6 +177,7 @@ fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()>
Ok((input, (node, ws)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let bytes = input.as_bytes();
@ -167,19 +194,23 @@ fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
}
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenElement, ()> {
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, (bool, GreenElement), ()> {
if memchr(b'\n', input.as_bytes()).is_none() {
return Ok((
input.of(""),
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
(
false,
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
),
));
};
let mut skip_one = true;
let mut i = input;
let mut children = vec![];
let mut previous_line_is_blank = false;
'l: loop {
let mut previous_blank_line: Option<(Input, Input)> = None;
'l: while !i.is_empty() {
for (input, head) in line_starts_iter(i.as_str())
// the first line in list item content will always be a paragraph
// so we need to skip it in the first iteration
@ -188,50 +219,51 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenEl
{
match get_line_indent(input.as_str()) {
Some(next_indent) => {
previous_line_is_blank = false;
if next_indent <= indent {
let (input, head) = previous_blank_line.unwrap_or_else(|| (input, head));
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
children.extend(paragraph_nodes(head)?);
}
return Ok((input, node(LIST_ITEM_CONTENT, children)));
return Ok((input, (false, node(LIST_ITEM_CONTENT, children))));
}
previous_blank_line = None;
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
children.extend(paragraph_nodes(head)?);
}
children.push(element);
debug_assert!(
input.input_len() < i.input_len(),
"{} < {}",
input.input_len(),
i.input_len()
);
i = input;
skip_one = false;
continue 'l;
}
}
_ if previous_line_is_blank => {
// list item ends at two consecutive empty lines
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
}
let (input, post_blank) = blank_lines(input)?;
children.extend(post_blank);
return Ok((input, node(LIST_ITEM_CONTENT, children)));
}
_ => {
previous_line_is_blank = true;
// list item ends at two consecutive empty lines
if let Some((input, head)) = previous_blank_line {
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
return Ok((input, (true, node(LIST_ITEM_CONTENT, children))));
} else {
previous_blank_line = Some((input, head))
}
}
}
}
children.extend(paragraph_nodes(i)?);
break;
}
if !i.is_empty() {
children.push(node(PARAGRAPH, object_nodes(i)));
}
Ok((input.of(""), node(LIST_ITEM_CONTENT, children)))
Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children))))
}
fn get_line_indent(input: &str) -> Option<usize> {
@ -247,9 +279,8 @@ fn parse() {
let to_list = to_ast::<List>(list_node);
let list = to_list("1)");
insta::assert_debug_snapshot!(
list.syntax,
to_list("1)").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
@ -260,9 +291,8 @@ fn parse() {
"###
);
let list = to_list("+ ");
insta::assert_debug_snapshot!(
list.syntax,
to_list("+ ").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
@ -273,9 +303,8 @@ fn parse() {
"###
);
let list = to_list("-\n");
insta::assert_debug_snapshot!(
list.syntax,
to_list("-\n").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
@ -283,14 +312,12 @@ fn parse() {
LIST_ITEM_BULLET@0..1 "-"
LIST_ITEM_CONTENT@1..2
PARAGRAPH@1..2
TEXT@1..2 "\n"
BLANK_LINE@1..2 "\n"
"###
);
let list = to_list("+ 1");
assert!(!list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
to_list("+ 1").syntax,
@r###"
LIST@0..3
LIST_ITEM@0..3
@ -302,9 +329,8 @@ fn parse() {
"###
);
let list = to_list("+ 1\n");
insta::assert_debug_snapshot!(
list.syntax,
to_list("+ 1\n").syntax,
@r###"
LIST@0..4
LIST_ITEM@0..4
@ -316,12 +342,13 @@ fn parse() {
"###
);
let list = to_list("+ [@A] 1\n\n\n+ 2");
// list ends with two consecutive blank lines, and these blank lines
// will be the post_blank of list node
insta::assert_debug_snapshot!(
list.syntax,
to_list("+ [@A] 1\n\n\n+ 2").syntax,
@r###"
LIST@0..14
LIST_ITEM@0..11
LIST@0..11
LIST_ITEM@0..9
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_COUNTER@2..6
@ -330,25 +357,21 @@ fn parse() {
TEXT@4..5 "A"
R_BRACKET@5..6 "]"
WHITESPACE@6..7 " "
LIST_ITEM_CONTENT@7..11
PARAGRAPH@7..10
TEXT@7..10 "1\n\n"
BLANK_LINE@10..11 "\n"
LIST_ITEM@11..14
LIST_ITEM_INDENT@11..11 ""
LIST_ITEM_BULLET@11..13 "+ "
LIST_ITEM_CONTENT@13..14
PARAGRAPH@13..14
TEXT@13..14 "2"
LIST_ITEM_CONTENT@7..9
PARAGRAPH@7..9
TEXT@7..9 "1\n"
BLANK_LINE@9..10 "\n"
BLANK_LINE@10..11 "\n"
"###
);
let list = to_list("+ *TAG* :: item1\n+ [X] item2");
// empty line between list item, the empty line will be
// the post_blank of first item
insta::assert_debug_snapshot!(
list.syntax,
to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax,
@r###"
LIST@0..28
LIST_ITEM@0..17
LIST@0..29
LIST_ITEM@0..18
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_TAG@2..10
@ -362,20 +385,22 @@ fn parse() {
LIST_ITEM_CONTENT@10..17
PARAGRAPH@10..17
TEXT@10..17 " item1\n"
LIST_ITEM@17..28
LIST_ITEM_INDENT@17..17 ""
LIST_ITEM_BULLET@17..19 "+ "
LIST_ITEM_CHECK_BOX@19..22
L_BRACKET@19..20 "["
TEXT@20..21 "X"
R_BRACKET@21..22 "]"
WHITESPACE@22..23 " "
LIST_ITEM_CONTENT@23..28
PARAGRAPH@23..28
TEXT@23..28 "item2"
BLANK_LINE@17..18 "\n"
LIST_ITEM@18..29
LIST_ITEM_INDENT@18..18 ""
LIST_ITEM_BULLET@18..20 "+ "
LIST_ITEM_CHECK_BOX@20..23
L_BRACKET@20..21 "["
TEXT@21..22 "X"
R_BRACKET@22..23 "]"
WHITESPACE@23..24 " "
LIST_ITEM_CONTENT@24..29
PARAGRAPH@24..29
TEXT@24..29 "item2"
"###
);
// nested list
let list = to_list(
r#"+ item1
+ item2"#,
@ -400,9 +425,8 @@ fn parse() {
"###
);
let list = to_list("* item1\nitem2");
insta::assert_debug_snapshot!(
list.syntax,
to_list("* item1\nitem2").syntax,
@r###"
LIST@0..8
LIST_ITEM@0..8
@ -414,13 +438,8 @@ fn parse() {
"###
);
let list = to_list(
r#"* item1
still item 1"#,
);
insta::assert_debug_snapshot!(
list.syntax,
to_list("* item1\n\n still item 1").syntax,
@r###"
LIST@0..23
LIST_ITEM@0..23
@ -428,7 +447,8 @@ fn parse() {
LIST_ITEM_BULLET@0..2 "* "
LIST_ITEM_CONTENT@2..23
PARAGRAPH@2..9
TEXT@2..9 "item1\n\n"
TEXT@2..8 "item1\n"
BLANK_LINE@8..9 "\n"
PARAGRAPH@9..23
TEXT@9..23 " still item 1"
"###
@ -455,7 +475,8 @@ fn parse() {
LIST_ITEM_BULLET@14..16 "+ "
LIST_ITEM_CONTENT@16..26
PARAGRAPH@16..26
TEXT@16..26 "item2\n "
TEXT@16..22 "item2\n"
BLANK_LINE@22..26 " "
"###
);
@ -476,14 +497,16 @@ fn parse() {
LIST_ITEM_BULLET@0..3 "1. "
LIST_ITEM_CONTENT@3..23
PARAGRAPH@3..10
TEXT@3..10 "item1\n\n"
TEXT@3..9 "item1\n"
BLANK_LINE@9..10 "\n"
LIST@10..23
LIST_ITEM@10..23
LIST_ITEM_INDENT@10..14 " "
LIST_ITEM_BULLET@14..16 "- "
LIST_ITEM_CONTENT@16..23
PARAGRAPH@16..23
TEXT@16..23 "item2\n\n"
LIST_ITEM_CONTENT@16..22
PARAGRAPH@16..22
TEXT@16..22 "item2\n"
BLANK_LINE@22..23 "\n"
LIST_ITEM@23..32
LIST_ITEM_INDENT@23..23 ""
LIST_ITEM_BULLET@23..26 "3. "
@ -493,21 +516,18 @@ fn parse() {
"###
);
let list = to_list(
r#" + item1
+ item2"#,
);
// nested list
insta::assert_debug_snapshot!(
list.syntax,
to_list(" + item1\n\n + item2").syntax,
@r###"
LIST@0..20
LIST_ITEM@0..11
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..4 "+ "
LIST_ITEM_CONTENT@4..11
PARAGRAPH@4..11
TEXT@4..11 "item1\n\n"
LIST_ITEM_CONTENT@4..10
PARAGRAPH@4..10
TEXT@4..10 "item1\n"
BLANK_LINE@10..11 "\n"
LIST_ITEM@11..20
LIST_ITEM_INDENT@11..13 " "
LIST_ITEM_BULLET@13..15 "+ "
@ -517,14 +537,8 @@ fn parse() {
"###
);
let list = to_list(
r#" 1. item1
2. item2
3. item3"#,
);
assert!(list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
to_list(" 1. item1\n 2. item2\n 3. item3").syntax,
@r###"
LIST@0..42
LIST_ITEM@0..42
@ -550,15 +564,9 @@ fn parse() {
"###
);
let list = to_list(
r#" 1. item1
#+begin_example
hello
#+end_example
"#,
);
// Indentation of lines within other greater elements do not count
insta::assert_debug_snapshot!(
list.syntax,
to_list(" 1. item1\n #+begin_example\nhello\n#+end_example\n").syntax,
@r###"
LIST@0..51
LIST_ITEM@0..51