feat: update list node parsing
This commit is contained in:
parent
ed987d468a
commit
b7ddc0f076
10 changed files with 411 additions and 242 deletions
|
|
@ -1,7 +1,4 @@
|
|||
use nom::{
|
||||
combinator::{iterator, opt},
|
||||
IResult,
|
||||
};
|
||||
use nom::{combinator::opt, IResult, InputLength};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, node, GreenElement},
|
||||
|
|
@ -22,18 +19,29 @@ fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
|
||||
children.extend(pre_blank);
|
||||
|
||||
if input.is_empty() {
|
||||
return Ok((input, node(DOCUMENT, children)));
|
||||
}
|
||||
|
||||
let (input, section) = opt(section_node)(input)?;
|
||||
if let Some(section) = section {
|
||||
children.push(section);
|
||||
}
|
||||
|
||||
let mut it = iterator(input, headline_node);
|
||||
children.extend(&mut it);
|
||||
let (input, _) = it.finish()?;
|
||||
let mut i = input;
|
||||
while !i.is_empty() {
|
||||
let (input, headline) = headline_node(i)?;
|
||||
debug_assert!(
|
||||
i.input_len() > input.input_len(),
|
||||
"{} > {}",
|
||||
i.input_len(),
|
||||
input.input_len(),
|
||||
);
|
||||
i = input;
|
||||
children.push(headline);
|
||||
}
|
||||
|
||||
debug_assert!(input.is_empty());
|
||||
|
||||
Ok((input, node(DOCUMENT, children)))
|
||||
Ok((i, node(DOCUMENT, children)))
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(!input.is_empty());
|
||||
crate::lossless_parser!(property_drawer_node_base, input)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
use nom::IResult;
|
||||
use std::iter::once;
|
||||
|
||||
use memchr::memchr2_iter;
|
||||
use nom::{AsBytes, IResult, InputLength, InputTake};
|
||||
|
||||
use super::{
|
||||
block::block_node,
|
||||
|
|
@ -12,27 +15,46 @@ use super::{
|
|||
input::Input,
|
||||
keyword::{affiliated_keyword_nodes, keyword_node},
|
||||
list::list_node,
|
||||
paragraph::paragraph_node,
|
||||
paragraph::{paragraph_node, paragraph_nodes},
|
||||
rule::rule_node,
|
||||
table::{org_table_node, table_el_node},
|
||||
};
|
||||
|
||||
/// Parses input into multiple element
|
||||
/// Recognizes multiple org-mode elements
|
||||
///
|
||||
/// input must not contains blank line in the beginning
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
|
||||
debug_assert!(!input.is_empty());
|
||||
// TODO:
|
||||
// debug_assert!(
|
||||
// blank_lines(input).unwrap().1.is_empty(),
|
||||
// "input must not starts with blank lines: {:?}",
|
||||
// input.s
|
||||
// );
|
||||
|
||||
let mut i = input;
|
||||
let mut nodes = vec![];
|
||||
|
||||
while !i.is_empty() {
|
||||
let result = element_node(i);
|
||||
debug_assert!(result.is_ok(), "element_node() always returns Ok()");
|
||||
let (input, node) = result?;
|
||||
i = input;
|
||||
nodes.push(node);
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ElementPositions::new(i) {
|
||||
if let Ok((input, element)) = element_node(input) {
|
||||
if !head.is_empty() {
|
||||
nodes.extend(paragraph_nodes(head)?);
|
||||
}
|
||||
nodes.push(element);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
input.input_len(),
|
||||
i.input_len()
|
||||
);
|
||||
i = input;
|
||||
continue 'l;
|
||||
}
|
||||
}
|
||||
nodes.extend(paragraph_nodes(i)?);
|
||||
break;
|
||||
}
|
||||
|
||||
debug_assert_eq!(
|
||||
|
|
@ -44,6 +66,7 @@ pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
|
|||
Ok(nodes)
|
||||
}
|
||||
|
||||
/// Recognizes an org-mode element expect paragraph
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
// skip affiliated keyword first
|
||||
|
|
@ -52,11 +75,7 @@ pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
let has_affiliated_keyword = !nodes.is_empty();
|
||||
|
||||
// find first non-whitespace character
|
||||
let byte = i
|
||||
.as_str()
|
||||
.trim_start_matches(|c| c == ' ' || c == '\t')
|
||||
.bytes()
|
||||
.next();
|
||||
let byte = i.bytes().find(|&b| b != b' ' && b != b'\t');
|
||||
|
||||
debug_assert!(
|
||||
!(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))),
|
||||
|
|
@ -80,7 +99,78 @@ pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
_ => Err(nom::Err::Error(())),
|
||||
};
|
||||
|
||||
result.or_else(|_| paragraph_node(input))
|
||||
if has_affiliated_keyword {
|
||||
result.or_else(|_| paragraph_node(input))
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
struct ElementPositions<'a> {
|
||||
input: Input<'a>,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> ElementPositions<'a> {
|
||||
fn new(input: Input<'a>) -> Self {
|
||||
ElementPositions { input, pos: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ElementPositions<'a> {
|
||||
type Item = (Input<'a>, Input<'a>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.pos >= self.input.s.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let bytes = &self.input.as_bytes()[self.pos..];
|
||||
|
||||
let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1));
|
||||
|
||||
while let Some(i) = iter.next() {
|
||||
let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?;
|
||||
|
||||
if matches!(
|
||||
b,
|
||||
b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#'
|
||||
) {
|
||||
let previous = self.pos;
|
||||
self.pos = iter
|
||||
.next()
|
||||
.map(|i| i + self.pos)
|
||||
.unwrap_or_else(|| self.input.s.len());
|
||||
|
||||
debug_assert!(
|
||||
previous < self.pos && self.pos <= self.input.s.len(),
|
||||
"{} < {} < {}",
|
||||
previous,
|
||||
self.pos,
|
||||
self.input.s.len()
|
||||
);
|
||||
|
||||
let (input, head) = self.input.take_split(i + previous);
|
||||
|
||||
return Some((input, head));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positions() {
|
||||
let config = crate::ParseConfig::default();
|
||||
let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n";
|
||||
let vec = ElementPositions::new((s, &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 5);
|
||||
assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n");
|
||||
assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n");
|
||||
assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n");
|
||||
assert_eq!(vec[3].0.s, "\t\t[\n: \r\n");
|
||||
assert_eq!(vec[4].0.s, ": \r\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -94,6 +184,7 @@ fn parse() {
|
|||
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
|
||||
};
|
||||
|
||||
// paragraph stops at blank lines
|
||||
insta::assert_debug_snapshot!(
|
||||
t(r#"a
|
||||
|
||||
|
|
@ -108,39 +199,18 @@ b"#),
|
|||
"###
|
||||
);
|
||||
|
||||
// paragraph followed by special element
|
||||
insta::assert_debug_snapshot!(
|
||||
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
|
||||
t("Table:\n|cell"),
|
||||
@r###"
|
||||
SECTION@0..41
|
||||
PARAGRAPH@0..41
|
||||
AFFILIATED_KEYWORD@0..26
|
||||
HASH_PLUS@0..2 "#+"
|
||||
TEXT@2..11 "ATTR_HTML"
|
||||
COLON@11..12 ":"
|
||||
TEXT@12..25 " :width 300px"
|
||||
NEW_LINE@25..26 "\n"
|
||||
LINK@26..41
|
||||
L_BRACKET2@26..28 "[["
|
||||
LINK_PATH@28..39 "./img/a.jpg"
|
||||
R_BRACKET2@39..41 "]]"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
|
||||
@r###"
|
||||
SECTION@0..41
|
||||
PARAGRAPH@0..41
|
||||
AFFILIATED_KEYWORD@0..26
|
||||
HASH_PLUS@0..2 "#+"
|
||||
TEXT@2..11 "ATTR_HTML"
|
||||
COLON@11..12 ":"
|
||||
TEXT@12..25 " :width 300px"
|
||||
NEW_LINE@25..26 "\n"
|
||||
LINK@26..41
|
||||
L_BRACKET2@26..28 "[["
|
||||
LINK_PATH@28..39 "./img/a.jpg"
|
||||
R_BRACKET2@39..41 "]]"
|
||||
SECTION@0..12
|
||||
PARAGRAPH@0..7
|
||||
TEXT@0..7 "Table:\n"
|
||||
ORG_TABLE@7..12
|
||||
ORG_TABLE_STANDARD_ROW@7..12
|
||||
PIPE@7..8 "|"
|
||||
ORG_TABLE_CELL@8..12
|
||||
TEXT@8..12 "cell"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ use super::{
|
|||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn headline_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(!input.is_empty());
|
||||
crate::lossless_parser!(headline_node_base, input)
|
||||
}
|
||||
|
||||
|
|
@ -59,16 +60,24 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
b.ws(ws_);
|
||||
b.nl(nl);
|
||||
|
||||
if nl.is_empty() {
|
||||
if input.is_empty() {
|
||||
return Ok((input, b.finish(HEADLINE)));
|
||||
}
|
||||
|
||||
let (input, planning) = opt(planning_node)(input)?;
|
||||
b.push_opt(planning);
|
||||
|
||||
if input.is_empty() {
|
||||
return Ok((input, b.finish(HEADLINE)));
|
||||
}
|
||||
|
||||
let (input, property_drawer) = opt(property_drawer_node)(input)?;
|
||||
b.push_opt(property_drawer);
|
||||
|
||||
if input.is_empty() {
|
||||
return Ok((input, b.finish(HEADLINE)));
|
||||
}
|
||||
|
||||
let (input, section) = opt(section_node)(input)?;
|
||||
b.push_opt(section);
|
||||
|
||||
|
|
@ -83,6 +92,12 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
|
||||
let (input, headline) = headline_node(i)?;
|
||||
b.push(headline);
|
||||
debug_assert!(
|
||||
i.input_len() > input.input_len(),
|
||||
"{} > {}",
|
||||
i.input_len(),
|
||||
input.input_len()
|
||||
);
|
||||
i = input;
|
||||
}
|
||||
|
||||
|
|
@ -91,15 +106,12 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn section_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(!input.is_empty());
|
||||
let (input, section) = section_text(input)?;
|
||||
Ok((input, node(SECTION, element_nodes(section)?)))
|
||||
}
|
||||
|
||||
pub fn section_text(input: Input) -> IResult<Input, Input, ()> {
|
||||
if input.is_empty() {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
fn section_text(input: Input) -> IResult<Input, Input, ()> {
|
||||
for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
|
||||
if headline_stars(input).is_ok() {
|
||||
if section.is_empty() {
|
||||
|
|
@ -119,12 +131,13 @@ fn headline_stars(input: Input) -> IResult<Input, Input, ()> {
|
|||
let level = bytes.iter().take_while(|&&c| c == b'*').count();
|
||||
|
||||
if level == 0 {
|
||||
Err(nom::Err::Error(()))
|
||||
} else if input.input_len() == level
|
||||
|| bytes[level] == b'\n'
|
||||
|| bytes[level] == b'\r'
|
||||
|| bytes[level] == b' '
|
||||
{
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
// followed by eof, new line, or whitespace
|
||||
else if matches!(
|
||||
bytes.get(level),
|
||||
None | Some(b'\n') | Some(b'\r') | Some(b' ')
|
||||
) {
|
||||
Ok(input.take_split(level))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
|
|
@ -151,6 +164,7 @@ fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
if item.is_empty() {
|
||||
children.push(token(COLON, ":"));
|
||||
can_not_be_ws = false;
|
||||
debug_assert!(i > ii, "{} > {}", i, ii);
|
||||
i = ii;
|
||||
} else if item
|
||||
.iter()
|
||||
|
|
@ -159,11 +173,13 @@ fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
children.push(input.slice(ii + 1..i).text_token());
|
||||
children.push(token(COLON, ":"));
|
||||
can_not_be_ws = false;
|
||||
debug_assert!(i > ii, "{} > {}", i, ii);
|
||||
i = ii;
|
||||
} else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws {
|
||||
children.push(input.slice(ii + 1..i).ws_token());
|
||||
children.push(token(COLON, ":"));
|
||||
can_not_be_ws = true;
|
||||
debug_assert!(i > ii, "{} > {}", i, ii);
|
||||
i = ii;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,12 @@ pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement
|
|||
break;
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
i.input_len() > input_.input_len(),
|
||||
"{} > {}",
|
||||
i.input_len(),
|
||||
input_.input_len()
|
||||
);
|
||||
i = input_;
|
||||
children.push(GreenElement::Node(GreenNode::new(
|
||||
SyntaxKind::AFFILIATED_KEYWORD.into(),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ use super::{
|
|||
input::Input,
|
||||
keyword::affiliated_keyword_nodes,
|
||||
object::object_nodes,
|
||||
paragraph::paragraph_nodes,
|
||||
SyntaxKind::*,
|
||||
};
|
||||
|
||||
|
|
@ -28,24 +29,35 @@ pub fn list_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
|
||||
let (input, first_indent) = space0(input)?;
|
||||
let (input, first_item) = list_item_node(first_indent, input)?;
|
||||
let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?;
|
||||
|
||||
let mut children = vec![];
|
||||
children.extend(affiliated_keywords);
|
||||
children.push(first_item);
|
||||
|
||||
let mut input = input;
|
||||
while !input.is_empty() {
|
||||
while !ends_with_empty_blank_lines && !input.is_empty() {
|
||||
let (input_, indent) = space0(input)?;
|
||||
|
||||
if indent.input_len() != first_indent.input_len() {
|
||||
break;
|
||||
}
|
||||
|
||||
if let Ok((input_, list_item)) = list_item_node(indent, input_) {
|
||||
children.push(list_item);
|
||||
input = input_;
|
||||
} else {
|
||||
let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_)
|
||||
else {
|
||||
break;
|
||||
};
|
||||
|
||||
children.push(list_item);
|
||||
debug_assert!(
|
||||
input.input_len() > input_.input_len(),
|
||||
"{} > {}",
|
||||
input.input_len(),
|
||||
input_.input_len(),
|
||||
);
|
||||
input = input_;
|
||||
|
||||
if ends_with_empty_blank_lines {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -57,7 +69,11 @@ fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
Ok((input, node(LIST, children)))
|
||||
}
|
||||
|
||||
fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
#[tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s))]
|
||||
fn list_item_node<'a>(
|
||||
indent: Input<'a>,
|
||||
input: Input<'a>,
|
||||
) -> IResult<Input<'a>, (bool, GreenElement), ()> {
|
||||
let (input, bullet) = recognize(tuple((
|
||||
alt((
|
||||
tag("+"),
|
||||
|
|
@ -91,7 +107,9 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
|
|||
let (input, counter) = opt(list_item_counter)(input)?;
|
||||
let (input, checkbox) = opt(list_item_checkbox)(input)?;
|
||||
let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?;
|
||||
let (input, content) = list_item_content_node(input, indent.input_len())?;
|
||||
let (input, (ends_with_empty_blank_lines, content)) =
|
||||
list_item_content_node(input, indent.input_len())?;
|
||||
let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?;
|
||||
|
||||
let mut children = vec![
|
||||
indent.token(LIST_ITEM_INDENT),
|
||||
|
|
@ -109,10 +127,17 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
|
|||
}
|
||||
|
||||
children.push(content);
|
||||
if let Some(post_blank) = post_blank {
|
||||
children.extend(post_blank);
|
||||
}
|
||||
|
||||
Ok((input, node(LIST_ITEM, children)))
|
||||
Ok((
|
||||
input,
|
||||
(ends_with_empty_blank_lines, node(LIST_ITEM, children)),
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
||||
let (input, node) = map(
|
||||
tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)),
|
||||
|
|
@ -129,6 +154,7 @@ fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()>
|
|||
Ok((input, (node, ws)))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
||||
let (input, node) = map(
|
||||
tuple((
|
||||
|
|
@ -151,6 +177,7 @@ fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()>
|
|||
Ok((input, (node, ws)))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
||||
let bytes = input.as_bytes();
|
||||
|
||||
|
|
@ -167,19 +194,23 @@ fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
|||
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
|
||||
}
|
||||
|
||||
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenElement, ()> {
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, (bool, GreenElement), ()> {
|
||||
if memchr(b'\n', input.as_bytes()).is_none() {
|
||||
return Ok((
|
||||
input.of(""),
|
||||
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
|
||||
(
|
||||
false,
|
||||
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
|
||||
),
|
||||
));
|
||||
};
|
||||
|
||||
let mut skip_one = true;
|
||||
let mut i = input;
|
||||
let mut children = vec![];
|
||||
let mut previous_line_is_blank = false;
|
||||
'l: loop {
|
||||
let mut previous_blank_line: Option<(Input, Input)> = None;
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in line_starts_iter(i.as_str())
|
||||
// the first line in list item content will always be a paragraph
|
||||
// so we need to skip it in the first iteration
|
||||
|
|
@ -188,50 +219,51 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenEl
|
|||
{
|
||||
match get_line_indent(input.as_str()) {
|
||||
Some(next_indent) => {
|
||||
previous_line_is_blank = false;
|
||||
|
||||
if next_indent <= indent {
|
||||
let (input, head) = previous_blank_line.unwrap_or_else(|| (input, head));
|
||||
if !head.is_empty() {
|
||||
children.push(node(PARAGRAPH, object_nodes(head)));
|
||||
children.extend(paragraph_nodes(head)?);
|
||||
}
|
||||
return Ok((input, node(LIST_ITEM_CONTENT, children)));
|
||||
return Ok((input, (false, node(LIST_ITEM_CONTENT, children))));
|
||||
}
|
||||
|
||||
previous_blank_line = None;
|
||||
|
||||
if let Ok((input, element)) = element_node(input) {
|
||||
if !head.is_empty() {
|
||||
children.push(node(PARAGRAPH, object_nodes(head)));
|
||||
children.extend(paragraph_nodes(head)?);
|
||||
}
|
||||
children.push(element);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
input.input_len(),
|
||||
i.input_len()
|
||||
);
|
||||
i = input;
|
||||
skip_one = false;
|
||||
continue 'l;
|
||||
}
|
||||
}
|
||||
_ if previous_line_is_blank => {
|
||||
// list item ends at two consecutive empty lines
|
||||
if !head.is_empty() {
|
||||
children.push(node(PARAGRAPH, object_nodes(head)));
|
||||
}
|
||||
let (input, post_blank) = blank_lines(input)?;
|
||||
|
||||
children.extend(post_blank);
|
||||
|
||||
return Ok((input, node(LIST_ITEM_CONTENT, children)));
|
||||
}
|
||||
_ => {
|
||||
previous_line_is_blank = true;
|
||||
// list item ends at two consecutive empty lines
|
||||
if let Some((input, head)) = previous_blank_line {
|
||||
if !head.is_empty() {
|
||||
children.extend(paragraph_nodes(head)?);
|
||||
}
|
||||
|
||||
return Ok((input, (true, node(LIST_ITEM_CONTENT, children))));
|
||||
} else {
|
||||
previous_blank_line = Some((input, head))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
children.extend(paragraph_nodes(i)?);
|
||||
break;
|
||||
}
|
||||
|
||||
if !i.is_empty() {
|
||||
children.push(node(PARAGRAPH, object_nodes(i)));
|
||||
}
|
||||
|
||||
Ok((input.of(""), node(LIST_ITEM_CONTENT, children)))
|
||||
Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children))))
|
||||
}
|
||||
|
||||
fn get_line_indent(input: &str) -> Option<usize> {
|
||||
|
|
@ -247,9 +279,8 @@ fn parse() {
|
|||
|
||||
let to_list = to_ast::<List>(list_node);
|
||||
|
||||
let list = to_list("1)");
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("1)").syntax,
|
||||
@r###"
|
||||
LIST@0..2
|
||||
LIST_ITEM@0..2
|
||||
|
|
@ -260,9 +291,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list("+ ");
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("+ ").syntax,
|
||||
@r###"
|
||||
LIST@0..2
|
||||
LIST_ITEM@0..2
|
||||
|
|
@ -273,9 +303,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list("-\n");
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("-\n").syntax,
|
||||
@r###"
|
||||
LIST@0..2
|
||||
LIST_ITEM@0..2
|
||||
|
|
@ -283,14 +312,12 @@ fn parse() {
|
|||
LIST_ITEM_BULLET@0..1 "-"
|
||||
LIST_ITEM_CONTENT@1..2
|
||||
PARAGRAPH@1..2
|
||||
TEXT@1..2 "\n"
|
||||
BLANK_LINE@1..2 "\n"
|
||||
"###
|
||||
);
|
||||
|
||||
let list = to_list("+ 1");
|
||||
assert!(!list.is_ordered());
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("+ 1").syntax,
|
||||
@r###"
|
||||
LIST@0..3
|
||||
LIST_ITEM@0..3
|
||||
|
|
@ -302,9 +329,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list("+ 1\n");
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("+ 1\n").syntax,
|
||||
@r###"
|
||||
LIST@0..4
|
||||
LIST_ITEM@0..4
|
||||
|
|
@ -316,12 +342,13 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list("+ [@A] 1\n\n\n+ 2");
|
||||
// list ends with two consecutive blank lines, and these blank lines
|
||||
// will be the post_blank of list node
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("+ [@A] 1\n\n\n+ 2").syntax,
|
||||
@r###"
|
||||
LIST@0..14
|
||||
LIST_ITEM@0..11
|
||||
LIST@0..11
|
||||
LIST_ITEM@0..9
|
||||
LIST_ITEM_INDENT@0..0 ""
|
||||
LIST_ITEM_BULLET@0..2 "+ "
|
||||
LIST_ITEM_COUNTER@2..6
|
||||
|
|
@ -330,25 +357,21 @@ fn parse() {
|
|||
TEXT@4..5 "A"
|
||||
R_BRACKET@5..6 "]"
|
||||
WHITESPACE@6..7 " "
|
||||
LIST_ITEM_CONTENT@7..11
|
||||
PARAGRAPH@7..10
|
||||
TEXT@7..10 "1\n\n"
|
||||
BLANK_LINE@10..11 "\n"
|
||||
LIST_ITEM@11..14
|
||||
LIST_ITEM_INDENT@11..11 ""
|
||||
LIST_ITEM_BULLET@11..13 "+ "
|
||||
LIST_ITEM_CONTENT@13..14
|
||||
PARAGRAPH@13..14
|
||||
TEXT@13..14 "2"
|
||||
LIST_ITEM_CONTENT@7..9
|
||||
PARAGRAPH@7..9
|
||||
TEXT@7..9 "1\n"
|
||||
BLANK_LINE@9..10 "\n"
|
||||
BLANK_LINE@10..11 "\n"
|
||||
"###
|
||||
);
|
||||
|
||||
let list = to_list("+ *TAG* :: item1\n+ [X] item2");
|
||||
// empty line between list item, the empty line will be
|
||||
// the post_blank of first item
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax,
|
||||
@r###"
|
||||
LIST@0..28
|
||||
LIST_ITEM@0..17
|
||||
LIST@0..29
|
||||
LIST_ITEM@0..18
|
||||
LIST_ITEM_INDENT@0..0 ""
|
||||
LIST_ITEM_BULLET@0..2 "+ "
|
||||
LIST_ITEM_TAG@2..10
|
||||
|
|
@ -362,20 +385,22 @@ fn parse() {
|
|||
LIST_ITEM_CONTENT@10..17
|
||||
PARAGRAPH@10..17
|
||||
TEXT@10..17 " item1\n"
|
||||
LIST_ITEM@17..28
|
||||
LIST_ITEM_INDENT@17..17 ""
|
||||
LIST_ITEM_BULLET@17..19 "+ "
|
||||
LIST_ITEM_CHECK_BOX@19..22
|
||||
L_BRACKET@19..20 "["
|
||||
TEXT@20..21 "X"
|
||||
R_BRACKET@21..22 "]"
|
||||
WHITESPACE@22..23 " "
|
||||
LIST_ITEM_CONTENT@23..28
|
||||
PARAGRAPH@23..28
|
||||
TEXT@23..28 "item2"
|
||||
BLANK_LINE@17..18 "\n"
|
||||
LIST_ITEM@18..29
|
||||
LIST_ITEM_INDENT@18..18 ""
|
||||
LIST_ITEM_BULLET@18..20 "+ "
|
||||
LIST_ITEM_CHECK_BOX@20..23
|
||||
L_BRACKET@20..21 "["
|
||||
TEXT@21..22 "X"
|
||||
R_BRACKET@22..23 "]"
|
||||
WHITESPACE@23..24 " "
|
||||
LIST_ITEM_CONTENT@24..29
|
||||
PARAGRAPH@24..29
|
||||
TEXT@24..29 "item2"
|
||||
"###
|
||||
);
|
||||
|
||||
// nested list
|
||||
let list = to_list(
|
||||
r#"+ item1
|
||||
+ item2"#,
|
||||
|
|
@ -400,9 +425,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list("* item1\nitem2");
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("* item1\nitem2").syntax,
|
||||
@r###"
|
||||
LIST@0..8
|
||||
LIST_ITEM@0..8
|
||||
|
|
@ -414,13 +438,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list(
|
||||
r#"* item1
|
||||
|
||||
still item 1"#,
|
||||
);
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list("* item1\n\n still item 1").syntax,
|
||||
@r###"
|
||||
LIST@0..23
|
||||
LIST_ITEM@0..23
|
||||
|
|
@ -428,7 +447,8 @@ fn parse() {
|
|||
LIST_ITEM_BULLET@0..2 "* "
|
||||
LIST_ITEM_CONTENT@2..23
|
||||
PARAGRAPH@2..9
|
||||
TEXT@2..9 "item1\n\n"
|
||||
TEXT@2..8 "item1\n"
|
||||
BLANK_LINE@8..9 "\n"
|
||||
PARAGRAPH@9..23
|
||||
TEXT@9..23 " still item 1"
|
||||
"###
|
||||
|
|
@ -455,7 +475,8 @@ fn parse() {
|
|||
LIST_ITEM_BULLET@14..16 "+ "
|
||||
LIST_ITEM_CONTENT@16..26
|
||||
PARAGRAPH@16..26
|
||||
TEXT@16..26 "item2\n "
|
||||
TEXT@16..22 "item2\n"
|
||||
BLANK_LINE@22..26 " "
|
||||
"###
|
||||
);
|
||||
|
||||
|
|
@ -476,14 +497,16 @@ fn parse() {
|
|||
LIST_ITEM_BULLET@0..3 "1. "
|
||||
LIST_ITEM_CONTENT@3..23
|
||||
PARAGRAPH@3..10
|
||||
TEXT@3..10 "item1\n\n"
|
||||
TEXT@3..9 "item1\n"
|
||||
BLANK_LINE@9..10 "\n"
|
||||
LIST@10..23
|
||||
LIST_ITEM@10..23
|
||||
LIST_ITEM_INDENT@10..14 " "
|
||||
LIST_ITEM_BULLET@14..16 "- "
|
||||
LIST_ITEM_CONTENT@16..23
|
||||
PARAGRAPH@16..23
|
||||
TEXT@16..23 "item2\n\n"
|
||||
LIST_ITEM_CONTENT@16..22
|
||||
PARAGRAPH@16..22
|
||||
TEXT@16..22 "item2\n"
|
||||
BLANK_LINE@22..23 "\n"
|
||||
LIST_ITEM@23..32
|
||||
LIST_ITEM_INDENT@23..23 ""
|
||||
LIST_ITEM_BULLET@23..26 "3. "
|
||||
|
|
@ -493,21 +516,18 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list(
|
||||
r#" + item1
|
||||
|
||||
+ item2"#,
|
||||
);
|
||||
// nested list
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list(" + item1\n\n + item2").syntax,
|
||||
@r###"
|
||||
LIST@0..20
|
||||
LIST_ITEM@0..11
|
||||
LIST_ITEM_INDENT@0..2 " "
|
||||
LIST_ITEM_BULLET@2..4 "+ "
|
||||
LIST_ITEM_CONTENT@4..11
|
||||
PARAGRAPH@4..11
|
||||
TEXT@4..11 "item1\n\n"
|
||||
LIST_ITEM_CONTENT@4..10
|
||||
PARAGRAPH@4..10
|
||||
TEXT@4..10 "item1\n"
|
||||
BLANK_LINE@10..11 "\n"
|
||||
LIST_ITEM@11..20
|
||||
LIST_ITEM_INDENT@11..13 " "
|
||||
LIST_ITEM_BULLET@13..15 "+ "
|
||||
|
|
@ -517,14 +537,8 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list(
|
||||
r#" 1. item1
|
||||
2. item2
|
||||
3. item3"#,
|
||||
);
|
||||
assert!(list.is_ordered());
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list(" 1. item1\n 2. item2\n 3. item3").syntax,
|
||||
@r###"
|
||||
LIST@0..42
|
||||
LIST_ITEM@0..42
|
||||
|
|
@ -550,15 +564,9 @@ fn parse() {
|
|||
"###
|
||||
);
|
||||
|
||||
let list = to_list(
|
||||
r#" 1. item1
|
||||
#+begin_example
|
||||
hello
|
||||
#+end_example
|
||||
"#,
|
||||
);
|
||||
// Indentation of lines within other greater elements do not count
|
||||
insta::assert_debug_snapshot!(
|
||||
list.syntax,
|
||||
to_list(" 1. item1\n #+begin_example\nhello\n#+end_example\n").syntax,
|
||||
@r###"
|
||||
LIST@0..51
|
||||
LIST_ITEM@0..51
|
||||
|
|
|
|||
|
|
@ -16,87 +16,115 @@ use super::{
|
|||
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
|
||||
};
|
||||
|
||||
pub struct InlinePositions<'a> {
|
||||
bytes: &'a [u8],
|
||||
struct ObjectPositions<'a> {
|
||||
input: Input<'a>,
|
||||
pos: usize,
|
||||
next: Option<usize>,
|
||||
finder: jetscii::BytesConst,
|
||||
}
|
||||
|
||||
impl InlinePositions<'_> {
|
||||
pub fn new(bytes: &[u8]) -> InlinePositions {
|
||||
InlinePositions {
|
||||
bytes,
|
||||
impl ObjectPositions<'_> {
|
||||
fn new(input: Input) -> ObjectPositions {
|
||||
ObjectPositions {
|
||||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for InlinePositions<'_> {
|
||||
type Item = usize;
|
||||
impl<'a> Iterator for ObjectPositions<'a> {
|
||||
type Item = (Input<'a>, Input<'a>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next.take().or_else(|| {
|
||||
jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n')
|
||||
.find(&self.bytes[self.pos..])
|
||||
.map(|i| {
|
||||
self.pos += i + 1;
|
||||
if self.input.input_len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self.bytes[self.pos - 1] {
|
||||
b'{' => {
|
||||
self.next = Some(self.pos);
|
||||
self.pos - 1
|
||||
}
|
||||
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
|
||||
_ => self.pos - 1,
|
||||
}
|
||||
})
|
||||
})
|
||||
if let Some(p) = self.next.take() {
|
||||
return Some(self.input.take_split(p));
|
||||
}
|
||||
|
||||
if self.pos >= self.input.input_len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let bytes = &self.input.as_bytes()[self.pos..];
|
||||
let previous = self.pos;
|
||||
let i = self.finder.find(bytes)?;
|
||||
self.pos += i + 1;
|
||||
|
||||
let p = match bytes[i] {
|
||||
b'{' => {
|
||||
self.next = Some(self.pos);
|
||||
self.pos - 1
|
||||
}
|
||||
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
|
||||
_ => self.pos - 1,
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
previous < self.pos && self.pos <= self.input.s.len(),
|
||||
"{} < {} < {}",
|
||||
previous,
|
||||
self.pos,
|
||||
self.input.s.len()
|
||||
);
|
||||
|
||||
// a valid object requires at least three characters
|
||||
if self.input.s.len() - p < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(self.input.take_split(p))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
||||
// TODO:
|
||||
// debug_assert!(!input.is_empty());
|
||||
let nodes = object_nodes_base(input);
|
||||
|
||||
let mut i = input;
|
||||
let mut nodes = vec![];
|
||||
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ObjectPositions::new(i) {
|
||||
debug_assert!(
|
||||
input.s.len() >= 3,
|
||||
"object must have at least three characters: {:?}",
|
||||
input.s
|
||||
);
|
||||
if let Ok((input, node)) = object_node(input) {
|
||||
if !head.is_empty() {
|
||||
nodes.push(head.text_token())
|
||||
}
|
||||
nodes.push(node);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
input.input_len(),
|
||||
i.input_len()
|
||||
);
|
||||
i = input;
|
||||
continue 'l;
|
||||
}
|
||||
}
|
||||
nodes.push(i.text_token());
|
||||
break;
|
||||
}
|
||||
|
||||
debug_assert_eq!(
|
||||
input.as_str(),
|
||||
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
|
||||
"parser must be lossless"
|
||||
);
|
||||
|
||||
nodes
|
||||
}
|
||||
|
||||
fn object_nodes_base(input: Input) -> Vec<GreenElement> {
|
||||
let mut children = vec![];
|
||||
|
||||
let mut i = input;
|
||||
'l: loop {
|
||||
for (input, head) in InlinePositions::new(i.as_bytes()).map(|idx| i.take_split(idx)) {
|
||||
if let Ok((input, node)) = object_node(input) {
|
||||
if !head.is_empty() {
|
||||
children.push(head.text_token())
|
||||
}
|
||||
children.push(node);
|
||||
i = input;
|
||||
continue 'l;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if !i.is_empty() {
|
||||
children.push(i.text_token());
|
||||
}
|
||||
|
||||
children
|
||||
}
|
||||
|
||||
/// Recognizes an org-mode element expect text
|
||||
fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
||||
if i.input_len() < 3 {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
match &i.as_bytes()[0] {
|
||||
b'*' => bold_node(i),
|
||||
b'+' => strike_node(i),
|
||||
|
|
@ -120,6 +148,22 @@ fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positions() {
|
||||
let config = crate::ParseConfig::default();
|
||||
|
||||
let vec = ObjectPositions::new(("*{", &config).into()).collect::<Vec<_>>();
|
||||
assert!(vec.is_empty());
|
||||
|
||||
let vec = ObjectPositions::new(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 5);
|
||||
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
|
||||
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
|
||||
assert_eq!(vec[2].0.s, "()}//s\nc<<");
|
||||
assert_eq!(vec[3].0.s, ")}//s\nc<<");
|
||||
assert_eq!(vec[4].0.s, "c<<");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use nom::{IResult, InputTake};
|
||||
use nom::{IResult, InputLength, InputTake};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, line_ends_iter, node, GreenElement},
|
||||
|
|
@ -8,10 +8,29 @@ use super::{
|
|||
SyntaxKind,
|
||||
};
|
||||
|
||||
/// Recognizes one paragraph
|
||||
pub fn paragraph_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
crate::lossless_parser!(paragraph_node_base, input)
|
||||
}
|
||||
|
||||
/// Recognizes multiple paragraphs
|
||||
pub fn paragraph_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
|
||||
let mut i = input;
|
||||
let mut children = vec![];
|
||||
while !i.is_empty() {
|
||||
let (input, node) = paragraph_node(i)?;
|
||||
children.push(node);
|
||||
debug_assert!(
|
||||
i.input_len() > input.input_len(),
|
||||
"{} > {}",
|
||||
i.input_len(),
|
||||
input.input_len()
|
||||
);
|
||||
i = input;
|
||||
}
|
||||
Ok(children)
|
||||
}
|
||||
|
||||
fn paragraph_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(!input.is_empty());
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ use super::{
|
|||
};
|
||||
|
||||
pub fn planning_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(!input.is_empty());
|
||||
crate::lossless_parser!(planning_node_base, input)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,13 +58,9 @@ fn list() {
|
|||
"#).to_html(),
|
||||
@r###"
|
||||
<main><section><ul><li><p>1
|
||||
|
||||
</p></li><li><p>2
|
||||
|
||||
</p><ul><li><p>3
|
||||
|
||||
</p></li><li><p>4
|
||||
|
||||
</p></li></ul></li><li><p>5
|
||||
</p></li></ul></section></main>
|
||||
"###
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue