diff --git a/src/elements/block.rs b/src/elements/block.rs index e33c421..186d82c 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,3 +1,5 @@ +use lines::Lines; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Block; @@ -9,27 +11,25 @@ impl Block { return None; } - let args = eol!(src); let name = until_while!(src, 8, |c| c == b' ' || c == b'\n', |c: u8| c .is_ascii_alphabetic())?; + let mut lines = Lines::new(src); + let (pre_cont_end, cont_beg, _) = lines.next()?; + let args = if pre_cont_end == name { + None + } else { + Some(&src[name..pre_cont_end]) + }; + let name = &src[8..name]; + let end_line = format!(r"#+END_{}", name); + let mut pre_end = cont_beg; - let mut pos = 0; - let end = format!(r"#+END_{}", &src[8..name]); - for line_end in lines!(src) { - if src[pos..line_end].trim().eq_ignore_ascii_case(&end) { - return Some(( - &src[8..name], - if name == args { - None - } else { - Some(&src[name..args]) - }, - args, - pos, - line_end, - )); + while let Some((_, end, line)) = lines.next() { + if line.trim().eq_ignore_ascii_case(&end_line) { + return Some((name, args, cont_beg, pre_end, end)); + } else { + pre_end = end; } - pos = line_end; } None @@ -40,7 +40,7 @@ impl Block { fn parse() { assert_eq!( Block::parse("#+BEGIN_SRC\n#+END_SRC"), - Some(("SRC", None, 11, 12, 21)) + Some(("SRC", None, 12, 12, 21)) ); assert_eq!( Block::parse( @@ -52,7 +52,7 @@ fn main() { #+END_SRC "# ), - Some(("SRC", Some(" rust"), 16, 104, 114)) + Some(("SRC", Some(" rust"), 17, 104, 114)) ); // TODO: more testing } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 9973c15..ec3bee4 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -125,7 +125,6 @@ impl<'a> Element<'a> { } } - // FIXME: if bytes[pos] == b'\n' { break ( Some(Element::Paragraph { @@ -154,7 +153,7 @@ impl<'a> Element<'a> { end: line_beg - start, }), start, - Some((list, 1)), + Some((list, 0)), ) }; } @@ -180,7 +179,7 @@ impl<'a> Element<'a> { if bytes[pos..].starts_with(b"#+") { if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { - let cont = &src[pos + cont_beg + 1..pos + cont_end - 1]; + let cont = &src[pos + cont_beg..pos + cont_end]; match name.to_uppercase().as_str() { "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), @@ -190,16 +189,16 @@ impl<'a> Element<'a> { "CENTER" => brk!( Element::CtrBlock { args, - cont_end, - end, + cont_end: cont_end - cont_beg, + end: end - cont_beg, }, cont_beg ), "QUOTE" => brk!( Element::QteBlock { args, - cont_end, - end, + cont_end: cont_end - cont_beg, + end: end - cont_beg, }, cont_beg ), @@ -207,8 +206,8 @@ impl<'a> Element<'a> { Element::SplBlock { name, args, - cont_end, - end + cont_end: cont_end - cont_beg, + end: end - cont_beg, }, cont_beg ), @@ -371,10 +370,28 @@ fn next_2() { ident: 0, ordered: false, }, - 1 + 0 )) ) ); + assert_eq!( + Element::next_2("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some(( + QteBlock { + args: None, + cont_end: len + 1, + end: len + 1 + "#+END_QUOTE\n".len() + }, + "#+BEGIN_QUOTE\n".len() + )) + ) + ); // TODO: more tests } diff --git a/src/lines.rs b/src/lines.rs index 4c404b8..3c0707e 100644 --- a/src/lines.rs +++ b/src/lines.rs @@ -5,7 +5,6 @@ pub struct Lines<'a> { src: &'a str, iter: Chain, Once>, start: usize, - pre_cont_end: usize, } impl<'a> Lines<'a> { @@ -14,7 +13,6 @@ impl<'a> Lines<'a> { src, iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())), start: 0, - pre_cont_end: 0, } } } diff --git a/src/parser.rs b/src/parser.rs index d4a213f..d383a96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,19 +5,58 @@ use objects::*; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] pub enum Container { - Headline { beg: usize, end: usize }, - Section { end: usize }, - Paragraph { cont_end: usize, end: usize }, - CtrBlock { cont_end: usize, end: usize }, - QteBlock { cont_end: usize, end: usize }, - SplBlock { cont_end: usize, end: usize }, - DynBlock { cont_end: usize, end: usize }, - List { ident: usize, ordered: bool }, - ListItem { cont_end: usize, end: usize }, - Italic { end: usize }, - Strike { end: usize }, - Bold { end: usize }, - Underline { end: usize }, + Headline { + beg: usize, + end: usize, + }, + Section { + end: usize, + }, + Paragraph { + cont_end: usize, + end: usize, + }, + CtrBlock { + cont_end: usize, + end: usize, + }, + QteBlock { + cont_end: usize, + end: usize, + }, + SplBlock { + cont_end: usize, + end: usize, + }, + DynBlock { + cont_end: usize, + end: usize, + }, + List { + ident: usize, + ordered: bool, + end: usize, + }, + ListItem { + cont_end: usize, + end: usize, + }, + Italic { + cont_end: usize, + end: usize, + }, + Strike { + cont_end: usize, + end: usize, + }, + Bold { + cont_end: usize, + end: usize, + }, + Underline { + cont_end: usize, + end: usize, + }, } #[cfg_attr(test, derive(PartialEq))] @@ -148,6 +187,14 @@ impl<'a> Parser<'a> { } } + pub fn offset(&self) -> usize { + self.off + } + + pub fn stack_depth(&self) -> usize { + self.stack.len() + } + fn next_sec_or_hdl(&mut self) -> Event<'a> { let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); debug_assert!(end <= self.text.len()); @@ -173,7 +220,8 @@ impl<'a> Parser<'a> { Event::HeadlineBeg(hdl) } - fn next_ele(&mut self, text: &'a str) -> Event<'a> { + fn next_ele(&mut self, end: usize) -> Event<'a> { + let text = &self.text[self.off..end]; let (ele, off) = self .ele_buf .take() @@ -188,8 +236,8 @@ impl<'a> Parser<'a> { self.off += off; - match ele { - Some(Element::Paragraph { cont_end, end }) => { + ele.map(|x| match x { + Element::Paragraph { cont_end, end } => { debug_assert!(cont_end <= text.len() && end <= text.len()); self.stack.push(Container::Paragraph { cont_end: cont_end + self.off, @@ -197,7 +245,7 @@ impl<'a> Parser<'a> { }); Event::ParagraphBeg } - Some(Element::QteBlock { end, cont_end, .. }) => { + Element::QteBlock { end, cont_end, .. } => { debug_assert!(cont_end <= text.len() && end <= text.len()); self.stack.push(Container::QteBlock { cont_end: cont_end + self.off, @@ -205,7 +253,7 @@ impl<'a> Parser<'a> { }); Event::QteBlockBeg } - Some(Element::CtrBlock { end, cont_end, .. }) => { + Element::CtrBlock { end, cont_end, .. } => { debug_assert!(cont_end <= text.len() && end <= text.len()); self.stack.push(Container::CtrBlock { cont_end: cont_end + self.off, @@ -213,12 +261,12 @@ impl<'a> Parser<'a> { }); Event::CtrBlockBeg } - Some(Element::SplBlock { + Element::SplBlock { name, args, end, cont_end, - }) => { + } => { debug_assert!(cont_end <= text.len() && end <= text.len()); self.stack.push(Container::SplBlock { cont_end: cont_end + self.off, @@ -226,12 +274,12 @@ impl<'a> Parser<'a> { }); Event::SplBlockBeg { name, args } } - Some(Element::DynBlock { + Element::DynBlock { name, args, cont_end, end, - }) => { + } => { debug_assert!(cont_end <= text.len() && end <= text.len()); self.stack.push(Container::DynBlock { cont_end: cont_end + self.off, @@ -239,27 +287,32 @@ impl<'a> Parser<'a> { }); Event::DynBlockBeg { name, args } } - Some(Element::List { ident, ordered }) => { - self.stack.push(Container::List { ident, ordered }); + Element::List { ident, ordered } => { + self.stack.push(Container::List { + ident, + ordered, + end, + }); self.has_more_item = true; Event::ListBeg { ordered } } - Some(Element::Call { value }) => Event::Call { value }, - Some(Element::Comment(c)) => Event::Comment(c), - Some(Element::CommentBlock { args, cont }) => Event::CommentBlock { args, cont }, - Some(Element::ExampleBlock { args, cont }) => Event::ExampleBlock { args, cont }, - Some(Element::ExportBlock { args, cont }) => Event::ExportBlock { args, cont }, - Some(Element::FixedWidth(f)) => Event::FixedWidth(f), - Some(Element::FnDef { label, cont }) => Event::FnDef { label, cont }, - Some(Element::Keyword { key, value }) => Event::Keyword { key, value }, - Some(Element::Rule) => Event::Rule, - Some(Element::SrcBlock { args, cont }) => Event::SrcBlock { args, cont }, - Some(Element::VerseBlock { args, cont }) => Event::VerseBlock { args, cont }, - None => self.end(), - } + Element::Call { value } => Event::Call { value }, + Element::Comment(c) => Event::Comment(c), + Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont }, + Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont }, + Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont }, + Element::FixedWidth(f) => Event::FixedWidth(f), + Element::FnDef { label, cont } => Event::FnDef { label, cont }, + Element::Keyword { key, value } => Event::Keyword { key, value }, + Element::Rule => Event::Rule, + Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont }, + Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont }, + }) + .unwrap_or_else(|| self.end()) } - fn next_obj(&mut self, text: &'a str) -> Event<'a> { + fn next_obj(&mut self, end: usize) -> Event<'a> { + let text = &self.text[self.off..end]; let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { let (obj, off, next_obj) = Object::next_2(text); self.obj_buf = next_obj; @@ -272,25 +325,29 @@ impl<'a> Parser<'a> { Object::Underline { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Underline { - end: self.off + end, + cont_end: self.off + end, + end: self.off + end + 1, }); } Object::Strike { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Strike { - end: self.off + end, + cont_end: self.off + end, + end: self.off + end + 1, }); } Object::Italic { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Italic { - end: self.off + end, + cont_end: self.off + end, + end: self.off + end + 1, }); } Object::Bold { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Bold { - end: self.off + end, + cont_end: self.off + end, + end: self.off + end + 1, }); } _ => (), @@ -318,14 +375,13 @@ impl<'a> Parser<'a> { } } - fn next_list_item(&mut self, ident: usize) -> Event<'a> { - let (bullet, cont_beg, cont_end, end, has_more) = - List::parse(&self.text[self.off..], ident); + fn next_list_item(&mut self, ident: usize, end: usize) -> Event<'a> { + let (bullet, off, cont_end, end, has_more) = List::parse(&self.text[self.off..end], ident); self.stack.push(Container::ListItem { cont_end: self.off + cont_end, end: self.off + end, }); - self.off += cont_beg; + self.off += off; self.has_more_item = has_more; Event::ListItemBeg { bullet } } @@ -353,16 +409,10 @@ impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { - if self.stack.is_empty() { - if self.off >= self.text.len() { - None - } else { - Some(self.next_sec_or_hdl()) - } - } else { - let last = *self.stack.last_mut().unwrap(); - - Some(match last { + self.stack + .last() + .cloned() + .map(|x| match x { Container::Headline { beg, end } => { if self.off >= end { self.end() @@ -377,52 +427,51 @@ impl<'a> Iterator for Parser<'a> { | Container::QteBlock { cont_end, end, .. } | Container::SplBlock { cont_end, end, .. } | Container::ListItem { cont_end, end } => { - let text = &self.text[self.off..cont_end]; + debug_assert!(self.off <= cont_end); if self.off >= cont_end { self.off = end; self.end() } else { - self.next_ele(text) + self.next_ele(cont_end) } } - Container::List { ident, .. } => { + Container::List { ident, end, .. } => { + debug_assert!(self.off <= end); if self.has_more_item { - self.next_list_item(ident) + self.next_list_item(ident, end) } else { self.end() } } Container::Section { end } => { - let text = &self.text[self.off..end]; + debug_assert!(self.off <= end); if self.off >= end { self.end() } else { - self.next_ele(text) + self.next_ele(end) } } - Container::Paragraph { cont_end, end } => { - let text = &self.text[self.off..cont_end]; + Container::Paragraph { cont_end, end } + | Container::Bold { cont_end, end } + | Container::Underline { cont_end, end } + | Container::Italic { cont_end, end } + | Container::Strike { cont_end, end } => { + debug_assert!(self.off <= end); if self.off >= cont_end { self.off = end; self.end() } else { - self.next_obj(text) - } - } - Container::Bold { end } - | Container::Underline { end } - | Container::Italic { end } - | Container::Strike { end } => { - let text = &self.text[self.off..end]; - if self.off >= end { - self.off += 1; - self.end() - } else { - self.next_obj(text) + self.next_obj(cont_end) } } }) - } + .or_else(|| { + if self.off >= self.text.len() { + None + } else { + Some(self.next_sec_or_hdl()) + } + }) } }