From b7ddc0f0763f83c7d996ea69922be190ffe2fe87 Mon Sep 17 00:00:00 2001
From: PoiScript <poiscript@gmail.com>
Date: Thu, 16 Nov 2023 18:50:33 +0800
Subject: [PATCH] feat: update list node parsing

---
 src/syntax/document.rs  |  28 +++--
 src/syntax/drawer.rs    |   1 +
 src/syntax/element.rs   | 162 +++++++++++++++++++--------
 src/syntax/headline.rs  |  40 +++++--
 src/syntax/keyword.rs   |   6 +
 src/syntax/list.rs      | 238 +++++++++++++++++++++-------------------
 src/syntax/object.rs    | 152 ++++++++++++++++---------
 src/syntax/paragraph.rs |  21 +++-
 src/syntax/planning.rs  |   1 +
 tests/html.rs           |   4 -
 10 files changed, 411 insertions(+), 242 deletions(-)
diff --git a/src/syntax/document.rs b/src/syntax/document.rs
index fb687cf..4c07d57 100644
--- a/src/syntax/document.rs
+++ b/src/syntax/document.rs
@@ -1,7 +1,4 @@
-use nom::{
-    combinator::{iterator, opt},
-    IResult,
-};
+use nom::{combinator::opt, IResult, InputLength};
 
 use super::{
     combinator::{blank_lines, node, GreenElement},
@@ -22,18 +19,29 @@ fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
 
     children.extend(pre_blank);
 
+    if input.is_empty() {
+        return Ok((input, node(DOCUMENT, children)));
+    }
+
     let (input, section) = opt(section_node)(input)?;
     if let Some(section) = section {
         children.push(section);
     }
 
-    let mut it = iterator(input, headline_node);
-    children.extend(&mut it);
-    let (input, _) = it.finish()?;
+    let mut i = input;
+    while !i.is_empty() {
+        let (input, headline) = headline_node(i)?;
+        debug_assert!(
+            i.input_len() > input.input_len(),
+            "{} > {}",
+            i.input_len(),
+            input.input_len(),
+        );
+        i = input;
+        children.push(headline);
+    }
 
-    debug_assert!(input.is_empty());
-
-    Ok((input, node(DOCUMENT, children)))
+    Ok((i, node(DOCUMENT, children)))
 }
 
 #[test]
diff --git a/src/syntax/drawer.rs b/src/syntax/drawer.rs
index 77967da..55216c9 100644
--- a/src/syntax/drawer.rs
+++ b/src/syntax/drawer.rs
@@ -128,6 +128,7 @@ fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
 
 #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
+    debug_assert!(!input.is_empty());
     crate::lossless_parser!(property_drawer_node_base, input)
 }
 
diff --git a/src/syntax/element.rs b/src/syntax/element.rs
index 736b51e..85d352f 100644
--- a/src/syntax/element.rs
+++ b/src/syntax/element.rs
@@ -1,4 +1,7 @@
-use nom::IResult;
+use std::iter::once;
+
+use memchr::memchr2_iter;
+use nom::{AsBytes, IResult, InputLength, InputTake};
 
 use super::{
     block::block_node,
@@ -12,27 +15,46 @@ use super::{
     input::Input,
     keyword::{affiliated_keyword_nodes, keyword_node},
     list::list_node,
-    paragraph::paragraph_node,
+    paragraph::{paragraph_node, paragraph_nodes},
     rule::rule_node,
     table::{org_table_node, table_el_node},
 };
 
-/// Parses input into multiple element
+/// Recognizes multiple org-mode elements
 ///
 /// input must not contains blank line in the beginning
 #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
     debug_assert!(!input.is_empty());
+    // TODO:
+    // debug_assert!(
+    //     blank_lines(input).unwrap().1.is_empty(),
+    //     "input must not starts with blank lines: {:?}",
+    //     input.s
+    // );
 
     let mut i = input;
     let mut nodes = vec![];
 
-    while !i.is_empty() {
-        let result = element_node(i);
-        debug_assert!(result.is_ok(), "element_node() always returns Ok()");
-        let (input, node) = result?;
-        i = input;
-        nodes.push(node);
+    'l: while !i.is_empty() {
+        for (input, head) in ElementPositions::new(i) {
+            if let Ok((input, element)) = element_node(input) {
+                if !head.is_empty() {
+                    nodes.extend(paragraph_nodes(head)?);
+                }
+                nodes.push(element);
+                debug_assert!(
+                    input.input_len() < i.input_len(),
+                    "{} < {}",
+                    input.input_len(),
+                    i.input_len()
+                );
+                i = input;
+                continue 'l;
+            }
+        }
+        nodes.extend(paragraph_nodes(i)?);
+        break;
     }
 
     debug_assert_eq!(
@@ -44,6 +66,7 @@ pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
     Ok(nodes)
 }
 
+/// Recognizes an org-mode element expect paragraph
 #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
     // skip affiliated keyword first
@@ -52,11 +75,7 @@ pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
     let has_affiliated_keyword = !nodes.is_empty();
 
     // find first non-whitespace character
-    let byte = i
-        .as_str()
-        .trim_start_matches(|c| c == ' ' || c == '\t')
-        .bytes()
-        .next();
+    let byte = i.bytes().find(|&b| b != b' ' && b != b'\t');
 
     debug_assert!(
         !(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))),
@@ -80,7 +99,78 @@ pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
         _ => Err(nom::Err::Error(())),
     };
 
-    result.or_else(|_| paragraph_node(input))
+    if has_affiliated_keyword {
+        result.or_else(|_| paragraph_node(input))
+    } else {
+        result
+    }
+}
+
+struct ElementPositions<'a> {
+    input: Input<'a>,
+    pos: usize,
+}
+
+impl<'a> ElementPositions<'a> {
+    fn new(input: Input<'a>) -> Self {
+        ElementPositions { input, pos: 0 }
+    }
+}
+
+impl<'a> Iterator for ElementPositions<'a> {
+    type Item = (Input<'a>, Input<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.pos >= self.input.s.len() {
+            return None;
+        }
+
+        let bytes = &self.input.as_bytes()[self.pos..];
+
+        let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1));
+
+        while let Some(i) = iter.next() {
+            let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?;
+
+            if matches!(
+                b,
+                b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#'
+            ) {
+                let previous = self.pos;
+                self.pos = iter
+                    .next()
+                    .map(|i| i + self.pos)
+                    .unwrap_or_else(|| self.input.s.len());
+
+                debug_assert!(
+                    previous < self.pos && self.pos <= self.input.s.len(),
+                    "{} < {} < {}",
+                    previous,
+                    self.pos,
+                    self.input.s.len()
+                );
+
+                let (input, head) = self.input.take_split(i + previous);
+
+                return Some((input, head));
+            }
+        }
+
+        None
+    }
+}
+
+#[test]
+fn positions() {
+    let config = crate::ParseConfig::default();
+    let s = "+\n\n    C\n    \r\n-\n\t\t[\n:  \r\n";
+    let vec = ElementPositions::new((s, &config).into()).collect::<Vec<_>>();
+    assert_eq!(vec.len(), 5);
+    assert_eq!(vec[0].0.s, "+\n\n    C\n    \r\n-\n\t\t[\n:  \r\n");
+    assert_eq!(vec[1].0.s, "    C\n    \r\n-\n\t\t[\n:  \r\n");
+    assert_eq!(vec[2].0.s, "-\n\t\t[\n:  \r\n");
+    assert_eq!(vec[3].0.s, "\t\t[\n:  \r\n");
+    assert_eq!(vec[4].0.s, ":  \r\n");
 }
 
 #[test]
@@ -94,6 +184,7 @@ fn parse() {
         SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
     };
 
+    // paragraph stops at blank lines
     insta::assert_debug_snapshot!(
         t(r#"a
 
@@ -108,39 +199,18 @@ b"#),
     "###
     );
 
+    // paragraph followed by special element
     insta::assert_debug_snapshot!(
-        t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
+        t("Table:\n|cell"),
         @r###"
-    SECTION@0..41
-      PARAGRAPH@0..41
-        AFFILIATED_KEYWORD@0..26
-          HASH_PLUS@0..2 "#+"
-          TEXT@2..11 "ATTR_HTML"
-          COLON@11..12 ":"
-          TEXT@12..25 " :width 300px"
-          NEW_LINE@25..26 "\n"
-        LINK@26..41
-          L_BRACKET2@26..28 "[["
-          LINK_PATH@28..39 "./img/a.jpg"
-          R_BRACKET2@39..41 "]]"
-    "###
-    );
-
-    insta::assert_debug_snapshot!(
-        t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
-        @r###"
-    SECTION@0..41
-      PARAGRAPH@0..41
-        AFFILIATED_KEYWORD@0..26
-          HASH_PLUS@0..2 "#+"
-          TEXT@2..11 "ATTR_HTML"
-          COLON@11..12 ":"
-          TEXT@12..25 " :width 300px"
-          NEW_LINE@25..26 "\n"
-        LINK@26..41
-          L_BRACKET2@26..28 "[["
-          LINK_PATH@28..39 "./img/a.jpg"
-          R_BRACKET2@39..41 "]]"
+    SECTION@0..12
+      PARAGRAPH@0..7
+        TEXT@0..7 "Table:\n"
+      ORG_TABLE@7..12
+        ORG_TABLE_STANDARD_ROW@7..12
+          PIPE@7..8 "|"
+          ORG_TABLE_CELL@8..12
+            TEXT@8..12 "cell"
     "###
     );
 }
diff --git a/src/syntax/headline.rs b/src/syntax/headline.rs
index 7011b17..15b6424 100644
--- a/src/syntax/headline.rs
+++ b/src/syntax/headline.rs
@@ -22,6 +22,7 @@ use super::{
 
 #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 pub fn headline_node(input: Input) -> IResult<Input, GreenElement, ()> {
+    debug_assert!(!input.is_empty());
     crate::lossless_parser!(headline_node_base, input)
 }
 
@@ -59,16 +60,24 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
     b.ws(ws_);
     b.nl(nl);
 
-    if nl.is_empty() {
+    if input.is_empty() {
         return Ok((input, b.finish(HEADLINE)));
     }
 
     let (input, planning) = opt(planning_node)(input)?;
     b.push_opt(planning);
 
+    if input.is_empty() {
+        return Ok((input, b.finish(HEADLINE)));
+    }
+
     let (input, property_drawer) = opt(property_drawer_node)(input)?;
     b.push_opt(property_drawer);
 
+    if input.is_empty() {
+        return Ok((input, b.finish(HEADLINE)));
+    }
+
     let (input, section) = opt(section_node)(input)?;
     b.push_opt(section);
 
@@ -83,6 +92,12 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
 
         let (input, headline) = headline_node(i)?;
         b.push(headline);
+        debug_assert!(
+            i.input_len() > input.input_len(),
+            "{} > {}",
+            i.input_len(),
+            input.input_len()
+        );
         i = input;
     }
 
@@ -91,15 +106,12 @@ fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
 
 #[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 pub fn section_node(input: Input) -> IResult<Input, GreenElement, ()> {
+    debug_assert!(!input.is_empty());
     let (input, section) = section_text(input)?;
     Ok((input, node(SECTION, element_nodes(section)?)))
 }
 
-pub fn section_text(input: Input) -> IResult<Input, Input, ()> {
-    if input.is_empty() {
-        return Err(nom::Err::Error(()));
-    }
-
+fn section_text(input: Input) -> IResult<Input, Input, ()> {
     for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
         if headline_stars(input).is_ok() {
             if section.is_empty() {
@@ -119,12 +131,13 @@ fn headline_stars(input: Input) -> IResult<Input, Input, ()> {
     let level = bytes.iter().take_while(|&&c| c == b'*').count();
 
     if level == 0 {
-        Err(nom::Err::Error(()))
-    } else if input.input_len() == level
-        || bytes[level] == b'\n'
-        || bytes[level] == b'\r'
-        || bytes[level] == b' '
-    {
+        return Err(nom::Err::Error(()));
+    }
+    // followed by eof, new line, or whitespace
+    else if matches!(
+        bytes.get(level),
+        None | Some(b'\n') | Some(b'\r') | Some(b' ')
+    ) {
         Ok(input.take_split(level))
     } else {
         Err(nom::Err::Error(()))
@@ -151,6 +164,7 @@ fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
         if item.is_empty() {
             children.push(token(COLON, ":"));
             can_not_be_ws = false;
+            debug_assert!(i > ii, "{} > {}", i, ii);
             i = ii;
         } else if item
             .iter()
@@ -159,11 +173,13 @@ fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
             children.push(input.slice(ii + 1..i).text_token());
             children.push(token(COLON, ":"));
             can_not_be_ws = false;
+            debug_assert!(i > ii, "{} > {}", i, ii);
             i = ii;
         } else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws {
             children.push(input.slice(ii + 1..i).ws_token());
             children.push(token(COLON, ":"));
             can_not_be_ws = true;
+            debug_assert!(i > ii, "{} > {}", i, ii);
             i = ii;
         } else {
             break;
diff --git a/src/syntax/keyword.rs b/src/syntax/keyword.rs
index 4a6725d..be4d519 100644
--- a/src/syntax/keyword.rs
+++ b/src/syntax/keyword.rs
@@ -57,6 +57,12 @@ pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement
             break;
         }
 
+        debug_assert!(
+            i.input_len() > input_.input_len(),
+            "{} > {}",
+            i.input_len(),
+            input_.input_len()
+        );
         i = input_;
         children.push(GreenElement::Node(GreenNode::new(
             SyntaxKind::AFFILIATED_KEYWORD.into(),
diff --git a/src/syntax/list.rs b/src/syntax/list.rs
index 098d2f0..d4cbf5f 100644
--- a/src/syntax/list.rs
+++ b/src/syntax/list.rs
@@ -17,6 +17,7 @@ use super::{
     input::Input,
     keyword::affiliated_keyword_nodes,
     object::object_nodes,
+    paragraph::paragraph_nodes,
     SyntaxKind::*,
 };
 
@@ -28,24 +29,35 @@ pub fn list_node(input: Input) -> IResult<Input, GreenElement, ()> {
 fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
     let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
     let (input, first_indent) = space0(input)?;
-    let (input, first_item) = list_item_node(first_indent, input)?;
+    let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?;
 
     let mut children = vec![];
     children.extend(affiliated_keywords);
     children.push(first_item);
 
     let mut input = input;
-    while !input.is_empty() {
+    while !ends_with_empty_blank_lines && !input.is_empty() {
         let (input_, indent) = space0(input)?;
 
         if indent.input_len() != first_indent.input_len() {
             break;
         }
 
-        if let Ok((input_, list_item)) = list_item_node(indent, input_) {
-            children.push(list_item);
-            input = input_;
-        } else {
+        let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_)
+        else {
+            break;
+        };
+
+        children.push(list_item);
+        debug_assert!(
+            input.input_len() > input_.input_len(),
+            "{} > {}",
+            input.input_len(),
+            input_.input_len(),
+        );
+        input = input_;
+
+        if ends_with_empty_blank_lines {
             break;
         }
     }
@@ -57,7 +69,11 @@ fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
     Ok((input, node(LIST, children)))
 }
 
-fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
+#[tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s))]
+fn list_item_node<'a>(
+    indent: Input<'a>,
+    input: Input<'a>,
+) -> IResult<Input<'a>, (bool, GreenElement), ()> {
     let (input, bullet) = recognize(tuple((
         alt((
             tag("+"),
@@ -91,7 +107,9 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
     let (input, counter) = opt(list_item_counter)(input)?;
     let (input, checkbox) = opt(list_item_checkbox)(input)?;
     let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?;
-    let (input, content) = list_item_content_node(input, indent.input_len())?;
+    let (input, (ends_with_empty_blank_lines, content)) =
+        list_item_content_node(input, indent.input_len())?;
+    let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?;
 
     let mut children = vec![
         indent.token(LIST_ITEM_INDENT),
@@ -109,10 +127,17 @@ fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>,
     }
 
     children.push(content);
+    if let Some(post_blank) = post_blank {
+        children.extend(post_blank);
+    }
 
-    Ok((input, node(LIST_ITEM, children)))
+    Ok((
+        input,
+        (ends_with_empty_blank_lines, node(LIST_ITEM, children)),
+    ))
 }
 
+#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
     let (input, node) = map(
         tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)),
@@ -129,6 +154,7 @@ fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()>
     Ok((input, (node, ws)))
 }
 
+#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
     let (input, node) = map(
         tuple((
@@ -151,6 +177,7 @@ fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()>
     Ok((input, (node, ws)))
 }
 
+#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
 fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
     let bytes = input.as_bytes();
 
@@ -167,19 +194,23 @@ fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
     Ok((input, (node(LIST_ITEM_TAG, children), ws)))
 }
 
-fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenElement, ()> {
+#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
+fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, (bool, GreenElement), ()> {
     if memchr(b'\n', input.as_bytes()).is_none() {
         return Ok((
             input.of(""),
-            node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
+            (
+                false,
+                node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
+            ),
         ));
     };
 
     let mut skip_one = true;
     let mut i = input;
     let mut children = vec![];
-    let mut previous_line_is_blank = false;
-    'l: loop {
+    let mut previous_blank_line: Option<(Input, Input)> = None;
+    'l: while !i.is_empty() {
         for (input, head) in line_starts_iter(i.as_str())
             // the first line in list item content will always be a paragraph
             // so we need to skip it in the first iteration
@@ -188,50 +219,51 @@ fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenEl
         {
             match get_line_indent(input.as_str()) {
                 Some(next_indent) => {
-                    previous_line_is_blank = false;
-
                     if next_indent <= indent {
+                        let (input, head) = previous_blank_line.unwrap_or_else(|| (input, head));
                         if !head.is_empty() {
-                            children.push(node(PARAGRAPH, object_nodes(head)));
+                            children.extend(paragraph_nodes(head)?);
                         }
-                        return Ok((input, node(LIST_ITEM_CONTENT, children)));
+                        return Ok((input, (false, node(LIST_ITEM_CONTENT, children))));
                     }
 
+                    previous_blank_line = None;
+
                     if let Ok((input, element)) = element_node(input) {
                         if !head.is_empty() {
-                            children.push(node(PARAGRAPH, object_nodes(head)));
+                            children.extend(paragraph_nodes(head)?);
                         }
                         children.push(element);
+                        debug_assert!(
+                            input.input_len() < i.input_len(),
+                            "{} < {}",
+                            input.input_len(),
+                            i.input_len()
+                        );
                         i = input;
                         skip_one = false;
                         continue 'l;
                     }
                 }
-                _ if previous_line_is_blank => {
-                    // list item ends at two consecutive empty lines
-                    if !head.is_empty() {
-                        children.push(node(PARAGRAPH, object_nodes(head)));
-                    }
-                    let (input, post_blank) = blank_lines(input)?;
-
-                    children.extend(post_blank);
-
-                    return Ok((input, node(LIST_ITEM_CONTENT, children)));
-                }
                 _ => {
-                    previous_line_is_blank = true;
+                    // list item ends at two consecutive empty lines
+                    if let Some((input, head)) = previous_blank_line {
+                        if !head.is_empty() {
+                            children.extend(paragraph_nodes(head)?);
+                        }
+
+                        return Ok((input, (true, node(LIST_ITEM_CONTENT, children))));
+                    } else {
+                        previous_blank_line = Some((input, head))
+                    }
                 }
             }
         }
-
+        children.extend(paragraph_nodes(i)?);
         break;
     }
 
-    if !i.is_empty() {
-        children.push(node(PARAGRAPH, object_nodes(i)));
-    }
-
-    Ok((input.of(""), node(LIST_ITEM_CONTENT, children)))
+    Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children))))
 }
 
 fn get_line_indent(input: &str) -> Option<usize> {
@@ -247,9 +279,8 @@ fn parse() {
 
     let to_list = to_ast::<List>(list_node);
 
-    let list = to_list("1)");
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("1)").syntax,
         @r###"
     LIST@0..2
       LIST_ITEM@0..2
@@ -260,9 +291,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list("+ ");
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("+ ").syntax,
         @r###"
     LIST@0..2
       LIST_ITEM@0..2
@@ -273,9 +303,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list("-\n");
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("-\n").syntax,
         @r###"
     LIST@0..2
       LIST_ITEM@0..2
@@ -283,14 +312,12 @@ fn parse() {
         LIST_ITEM_BULLET@0..1 "-"
         LIST_ITEM_CONTENT@1..2
           PARAGRAPH@1..2
-            TEXT@1..2 "\n"
+            BLANK_LINE@1..2 "\n"
     "###
     );
 
-    let list = to_list("+ 1");
-    assert!(!list.is_ordered());
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("+ 1").syntax,
         @r###"
     LIST@0..3
       LIST_ITEM@0..3
@@ -302,9 +329,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list("+ 1\n");
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("+ 1\n").syntax,
         @r###"
     LIST@0..4
       LIST_ITEM@0..4
@@ -316,12 +342,13 @@ fn parse() {
     "###
     );
 
-    let list = to_list("+ [@A] 1\n\n\n+ 2");
+    // list ends with two consecutive blank lines, and these blank lines
+    // will be the post_blank of list node
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("+ [@A] 1\n\n\n+ 2").syntax,
         @r###"
-    LIST@0..14
-      LIST_ITEM@0..11
+    LIST@0..11
+      LIST_ITEM@0..9
         LIST_ITEM_INDENT@0..0 ""
         LIST_ITEM_BULLET@0..2 "+ "
         LIST_ITEM_COUNTER@2..6
@@ -330,25 +357,21 @@ fn parse() {
           TEXT@4..5 "A"
           R_BRACKET@5..6 "]"
         WHITESPACE@6..7 " "
-        LIST_ITEM_CONTENT@7..11
-          PARAGRAPH@7..10
-            TEXT@7..10 "1\n\n"
-          BLANK_LINE@10..11 "\n"
-      LIST_ITEM@11..14
-        LIST_ITEM_INDENT@11..11 ""
-        LIST_ITEM_BULLET@11..13 "+ "
-        LIST_ITEM_CONTENT@13..14
-          PARAGRAPH@13..14
-            TEXT@13..14 "2"
+        LIST_ITEM_CONTENT@7..9
+          PARAGRAPH@7..9
+            TEXT@7..9 "1\n"
+      BLANK_LINE@9..10 "\n"
+      BLANK_LINE@10..11 "\n"
     "###
     );
 
-    let list = to_list("+ *TAG* :: item1\n+ [X] item2");
+    // empty line between list item, the empty line will be
+    // the post_blank of first item
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax,
         @r###"
-    LIST@0..28
-      LIST_ITEM@0..17
+    LIST@0..29
+      LIST_ITEM@0..18
         LIST_ITEM_INDENT@0..0 ""
         LIST_ITEM_BULLET@0..2 "+ "
         LIST_ITEM_TAG@2..10
@@ -362,20 +385,22 @@ fn parse() {
         LIST_ITEM_CONTENT@10..17
           PARAGRAPH@10..17
             TEXT@10..17 " item1\n"
-      LIST_ITEM@17..28
-        LIST_ITEM_INDENT@17..17 ""
-        LIST_ITEM_BULLET@17..19 "+ "
-        LIST_ITEM_CHECK_BOX@19..22
-          L_BRACKET@19..20 "["
-          TEXT@20..21 "X"
-          R_BRACKET@21..22 "]"
-        WHITESPACE@22..23 " "
-        LIST_ITEM_CONTENT@23..28
-          PARAGRAPH@23..28
-            TEXT@23..28 "item2"
+        BLANK_LINE@17..18 "\n"
+      LIST_ITEM@18..29
+        LIST_ITEM_INDENT@18..18 ""
+        LIST_ITEM_BULLET@18..20 "+ "
+        LIST_ITEM_CHECK_BOX@20..23
+          L_BRACKET@20..21 "["
+          TEXT@21..22 "X"
+          R_BRACKET@22..23 "]"
+        WHITESPACE@23..24 " "
+        LIST_ITEM_CONTENT@24..29
+          PARAGRAPH@24..29
+            TEXT@24..29 "item2"
     "###
     );
 
+    // nested list
     let list = to_list(
         r#"+ item1
   + item2"#,
@@ -400,9 +425,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list("* item1\nitem2");
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("* item1\nitem2").syntax,
         @r###"
     LIST@0..8
       LIST_ITEM@0..8
@@ -414,13 +438,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list(
-        r#"* item1
-
-  still item 1"#,
-    );
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("* item1\n\n  still item 1").syntax,
         @r###"
     LIST@0..23
       LIST_ITEM@0..23
@@ -428,7 +447,8 @@ fn parse() {
         LIST_ITEM_BULLET@0..2 "* "
         LIST_ITEM_CONTENT@2..23
           PARAGRAPH@2..9
-            TEXT@2..9 "item1\n\n"
+            TEXT@2..8 "item1\n"
+            BLANK_LINE@8..9 "\n"
           PARAGRAPH@9..23
             TEXT@9..23 "  still item 1"
     "###
@@ -455,7 +475,8 @@ fn parse() {
               LIST_ITEM_BULLET@14..16 "+ "
               LIST_ITEM_CONTENT@16..26
                 PARAGRAPH@16..26
-                  TEXT@16..26 "item2\n    "
+                  TEXT@16..22 "item2\n"
+                  BLANK_LINE@22..26 "    "
     "###
     );
 
@@ -476,14 +497,16 @@ fn parse() {
         LIST_ITEM_BULLET@0..3 "1. "
         LIST_ITEM_CONTENT@3..23
           PARAGRAPH@3..10
-            TEXT@3..10 "item1\n\n"
+            TEXT@3..9 "item1\n"
+            BLANK_LINE@9..10 "\n"
           LIST@10..23
             LIST_ITEM@10..23
               LIST_ITEM_INDENT@10..14 "    "
               LIST_ITEM_BULLET@14..16 "- "
-              LIST_ITEM_CONTENT@16..23
-                PARAGRAPH@16..23
-                  TEXT@16..23 "item2\n\n"
+              LIST_ITEM_CONTENT@16..22
+                PARAGRAPH@16..22
+                  TEXT@16..22 "item2\n"
+              BLANK_LINE@22..23 "\n"
       LIST_ITEM@23..32
         LIST_ITEM_INDENT@23..23 ""
         LIST_ITEM_BULLET@23..26 "3. "
@@ -493,21 +516,18 @@ fn parse() {
     "###
     );
 
-    let list = to_list(
-        r#"  + item1
-
-  + item2"#,
-    );
+    // nested list
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("  + item1\n\n  + item2").syntax,
         @r###"
     LIST@0..20
       LIST_ITEM@0..11
         LIST_ITEM_INDENT@0..2 "  "
         LIST_ITEM_BULLET@2..4 "+ "
-        LIST_ITEM_CONTENT@4..11
-          PARAGRAPH@4..11
-            TEXT@4..11 "item1\n\n"
+        LIST_ITEM_CONTENT@4..10
+          PARAGRAPH@4..10
+            TEXT@4..10 "item1\n"
+        BLANK_LINE@10..11 "\n"
       LIST_ITEM@11..20
         LIST_ITEM_INDENT@11..13 "  "
         LIST_ITEM_BULLET@13..15 "+ "
@@ -517,14 +537,8 @@ fn parse() {
     "###
     );
 
-    let list = to_list(
-        r#"  1. item1
-        2. item2
-      3. item3"#,
-    );
-    assert!(list.is_ordered());
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("  1. item1\n        2. item2\n      3. item3").syntax,
         @r###"
     LIST@0..42
       LIST_ITEM@0..42
@@ -550,15 +564,9 @@ fn parse() {
     "###
     );
 
-    let list = to_list(
-        r#"  1. item1
-    #+begin_example
-hello
-#+end_example
-"#,
-    );
+    // Indentation of lines within other greater elements do not count
     insta::assert_debug_snapshot!(
-        list.syntax,
+        to_list("  1. item1\n    #+begin_example\nhello\n#+end_example\n").syntax,
         @r###"
     LIST@0..51
       LIST_ITEM@0..51
diff --git a/src/syntax/object.rs b/src/syntax/object.rs
index c1b0888..6b294ad 100644
--- a/src/syntax/object.rs
+++ b/src/syntax/object.rs
@@ -16,87 +16,115 @@ use super::{
     timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
 };
 
-pub struct InlinePositions<'a> {
-    bytes: &'a [u8],
+struct ObjectPositions<'a> {
+    input: Input<'a>,
     pos: usize,
     next: Option<usize>,
+    finder: jetscii::BytesConst,
 }
 
-impl InlinePositions<'_> {
-    pub fn new(bytes: &[u8]) -> InlinePositions {
-        InlinePositions {
-            bytes,
+impl ObjectPositions<'_> {
+    fn new(input: Input) -> ObjectPositions {
+        ObjectPositions {
+            input,
             pos: 0,
             next: Some(0),
+            finder: jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'),
         }
     }
 }
 
-impl Iterator for InlinePositions<'_> {
-    type Item = usize;
+impl<'a> Iterator for ObjectPositions<'a> {
+    type Item = (Input<'a>, Input<'a>);
 
     fn next(&mut self) -> Option<Self::Item> {
-        self.next.take().or_else(|| {
-            jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n')
-                .find(&self.bytes[self.pos..])
-                .map(|i| {
-                    self.pos += i + 1;
+        if self.input.input_len() < 3 {
+            return None;
+        }
 
-                    match self.bytes[self.pos - 1] {
-                        b'{' => {
-                            self.next = Some(self.pos);
-                            self.pos - 1
-                        }
-                        b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
-                        _ => self.pos - 1,
-                    }
-                })
-        })
+        if let Some(p) = self.next.take() {
+            return Some(self.input.take_split(p));
+        }
+
+        if self.pos >= self.input.input_len() {
+            return None;
+        }
+
+        let bytes = &self.input.as_bytes()[self.pos..];
+        let previous = self.pos;
+        let i = self.finder.find(bytes)?;
+        self.pos += i + 1;
+
+        let p = match bytes[i] {
+            b'{' => {
+                self.next = Some(self.pos);
+                self.pos - 1
+            }
+            b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
+            _ => self.pos - 1,
+        };
+
+        debug_assert!(
+            previous < self.pos && self.pos <= self.input.s.len(),
+            "{} < {} < {}",
+            previous,
+            self.pos,
+            self.input.s.len()
+        );
+
+        // a valid object requires at least three characters
+        if self.input.s.len() - p < 3 {
+            return None;
+        }
+
+        Some(self.input.take_split(p))
     }
 }
 
 pub fn object_nodes(input: Input) -> Vec<GreenElement> {
+    // TODO:
     // debug_assert!(!input.is_empty());
-    let nodes = object_nodes_base(input);
+
+    let mut i = input;
+    let mut nodes = vec![];
+
+    'l: while !i.is_empty() {
+        for (input, head) in ObjectPositions::new(i) {
+            debug_assert!(
+                input.s.len() >= 3,
+                "object must have at least three characters: {:?}",
+                input.s
+            );
+            if let Ok((input, node)) = object_node(input) {
+                if !head.is_empty() {
+                    nodes.push(head.text_token())
+                }
+                nodes.push(node);
+                debug_assert!(
+                    input.input_len() < i.input_len(),
+                    "{} < {}",
+                    input.input_len(),
+                    i.input_len()
+                );
+                i = input;
+                continue 'l;
+            }
+        }
+        nodes.push(i.text_token());
+        break;
+    }
+
     debug_assert_eq!(
         input.as_str(),
         nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
         "parser must be lossless"
     );
+
     nodes
 }
 
-fn object_nodes_base(input: Input) -> Vec<GreenElement> {
-    let mut children = vec![];
-
-    let mut i = input;
-    'l: loop {
-        for (input, head) in InlinePositions::new(i.as_bytes()).map(|idx| i.take_split(idx)) {
-            if let Ok((input, node)) = object_node(input) {
-                if !head.is_empty() {
-                    children.push(head.text_token())
-                }
-                children.push(node);
-                i = input;
-                continue 'l;
-            }
-        }
-
-        break;
-    }
-
-    if !i.is_empty() {
-        children.push(i.text_token());
-    }
-
-    children
-}
-
+/// Recognizes an org-mode element expect text
 fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
-    if i.input_len() < 3 {
-        return Err(nom::Err::Error(()));
-    }
-
     match &i.as_bytes()[0] {
         b'*' => bold_node(i),
         b'+' => strike_node(i),
@@ -120,6 +148,22 @@ fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
     }
 }
 
+#[test]
+fn positions() {
+    let config = crate::ParseConfig::default();
+
+    let vec = ObjectPositions::new(("*{", &config).into()).collect::<Vec<_>>();
+    assert!(vec.is_empty());
+
+    let vec = ObjectPositions::new(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
+    assert_eq!(vec.len(), 5);
+    assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
+    assert_eq!(vec[1].0.s, "{()}//s\nc<<");
+    assert_eq!(vec[2].0.s, "()}//s\nc<<");
+    assert_eq!(vec[3].0.s, ")}//s\nc<<");
+    assert_eq!(vec[4].0.s, "c<<");
+}
+
 #[test]
 fn parse() {
     use crate::{
diff --git a/src/syntax/paragraph.rs b/src/syntax/paragraph.rs
index 27a59ca..caa8c65 100644
--- a/src/syntax/paragraph.rs
+++ b/src/syntax/paragraph.rs
@@ -1,4 +1,4 @@
-use nom::{IResult, InputTake};
+use nom::{IResult, InputLength, InputTake};
 
 use super::{
     combinator::{blank_lines, line_ends_iter, node, GreenElement},
@@ -8,10 +8,29 @@ use super::{
     SyntaxKind,
 };
 
+/// Recognizes one paragraph
 pub fn paragraph_node(input: Input) -> IResult<Input, GreenElement, ()> {
     crate::lossless_parser!(paragraph_node_base, input)
 }
 
+/// Recognizes multiple paragraphs
+pub fn paragraph_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
+    let mut i = input;
+    let mut children = vec![];
+    while !i.is_empty() {
+        let (input, node) = paragraph_node(i)?;
+        children.push(node);
+        debug_assert!(
+            i.input_len() > input.input_len(),
+            "{} > {}",
+            i.input_len(),
+            input.input_len()
+        );
+        i = input;
+    }
+    Ok(children)
+}
+
 fn paragraph_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
     debug_assert!(!input.is_empty());
 
diff --git a/src/syntax/planning.rs b/src/syntax/planning.rs
index 284d9e3..8560d3d 100644
--- a/src/syntax/planning.rs
+++ b/src/syntax/planning.rs
@@ -15,6 +15,7 @@ use super::{
 };
 
 pub fn planning_node(input: Input) -> IResult<Input, GreenElement, ()> {
+    debug_assert!(!input.is_empty());
     crate::lossless_parser!(planning_node_base, input)
 }
 
diff --git a/tests/html.rs b/tests/html.rs
index f54808d..82181b7 100644
--- a/tests/html.rs
+++ b/tests/html.rs
@@ -58,13 +58,9 @@ fn list() {
 "#).to_html(),
         @r###"
     <main><section><ul><li><p>1
-
     </p></li><li><p>2
-
     </p><ul><li><p>3
-
     </p></li><li><p>4
-
     </p></li></ul></li><li><p>5
     </p></li></ul></section></main>
     "###