//! Parser use crate::{elements::*, headline::*, objects::*}; use jetscii::bytes; use memchr::memchr_iter; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] enum Container { Headline(usize), Section(usize), Drawer, Paragraph, CtrBlock, QteBlock, SplBlock, DynBlock, List(usize, bool), ListItem, Italic, Strike, Bold, Underline, } #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub enum Event<'a> { HeadlineBeg(Headline<'a>), HeadlineEnd, SectionBeg, SectionEnd, ParagraphBeg, ParagraphEnd, CtrBlockBeg, CtrBlockEnd, QteBlockBeg, QteBlockEnd, SplBlockBeg { name: &'a str, args: Option<&'a str>, }, SplBlockEnd, DynBlockBeg { name: &'a str, args: Option<&'a str>, }, DynBlockEnd, CommentBlock { args: Option<&'a str>, cont: &'a str, }, ExampleBlock { args: Option<&'a str>, cont: &'a str, }, ExportBlock { args: Option<&'a str>, cont: &'a str, }, SrcBlock { args: Option<&'a str>, cont: &'a str, }, VerseBlock { args: Option<&'a str>, cont: &'a str, }, ListBeg { ordered: bool, }, ListEnd { ordered: bool, }, ListItemBeg { bullet: &'a str, }, ListItemEnd, Call { value: &'a str, }, Clock(Clock<'a>), Comment(&'a str), FixedWidth(&'a str), Planning(Planning<'a>), DrawerBeg(&'a str), DrawerEnd, TableStart, TableEnd, TableCell, LatexEnv, FnDef { label: &'a str, cont: &'a str, }, Keyword(Keyword<'a>), Rule, Timestamp(Timestamp<'a>), Cookie(Cookie<'a>), FnRef(FnRef<'a>), InlineCall(InlineCall<'a>), InlineSrc(InlineSrc<'a>), Link(Link<'a>), Macros(Macros<'a>), RadioTarget { target: &'a str, }, Snippet(Snippet<'a>), Target { target: &'a str, }, BoldBeg, BoldEnd, ItalicBeg, ItalicEnd, StrikeBeg, StrikeEnd, UnderlineBeg, UnderlineEnd, Verbatim(&'a str), Code(&'a str), Text(&'a str), } pub struct Parser<'a> { text: &'a str, stack: Vec<(Container, usize, usize)>, next_item: Vec>, off: usize, ele_buf: Option<(Event<'a>, usize, usize, usize)>, obj_buf: Option<(Event<'a>, usize, usize, usize)>, todo_keywords: &'a [&'a str], } impl<'a> Parser<'a> { /// creates a new parser from string pub fn new(text: &'a str) -> Parser<'a> { Parser { text, stack: Vec::new(), next_item: Vec::new(), off: 0, ele_buf: None, obj_buf: None, todo_keywords: DEFAULT_TODO_KEYWORDS, } } /// creates a new parser from string, with the specified keywords pub fn with_todo_keywrods(text: &'a str, todo_keywords: &'a [&'a str]) -> Parser<'a> { Parser { text, stack: Vec::new(), next_item: Vec::new(), off: 0, ele_buf: None, obj_buf: None, todo_keywords, } } /// returns current offset pub fn offset(&self) -> usize { self.off } /// returns current stack depth pub fn stack_depth(&self) -> usize { self.stack.len() } /// set todo keywords pub fn set_todo_keywords(&mut self, todo_keywords: &'a [&'a str]) { self.todo_keywords = todo_keywords; } /// set text pub fn set_text(&mut self, text: &'a str) { self.off = 0; self.stack.clear(); self.next_item.clear(); self.ele_buf = None; self.obj_buf = None; self.text = text; } fn next_section_or_headline(&mut self, text: &'a str) -> Event<'a> { let end = Headline::find_level(text, std::usize::MAX); if end != 0 { self.push_stack(Container::Section(self.off), end, end); Event::SectionBeg } else { self.next_headline(text) } } fn next_headline(&mut self, text: &'a str) -> Event<'a> { let (hdl, off, end) = Headline::parse(text, self.todo_keywords); self.push_stack(Container::Headline(self.off + off), end, end); self.off += off; Event::HeadlineBeg(hdl) } fn next_ele(&mut self, text: &'a str) -> Event<'a> { fn skip_empty_lines(text: &str) -> usize { let mut i = 0; for pos in memchr_iter(b'\n', text.as_bytes()) { if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { i = pos + 1; } else { return i; } } if text.as_bytes()[i..].iter().all(u8::is_ascii_whitespace) { text.len() } else { i } } let start = skip_empty_lines(text); if start == text.len() { self.off += text.len(); return self.end(); }; let tail = &text[start..]; let (ele, off, limit, end) = self .ele_buf .take() .or_else(|| self.real_next_ele(tail)) .unwrap_or_else(|| { let mut pos = 0; for i in memchr_iter(b'\n', tail.as_bytes()) { if tail.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) { return (Event::ParagraphBeg, 0, pos - 1 + start, i + 1 + start); } else if let Some(buf) = self.real_next_ele(&tail[pos..]) { self.ele_buf = Some(buf); return (Event::ParagraphBeg, 0, pos - 1 + start, pos + start); } pos = i + 1; } let len = text.len(); ( Event::ParagraphBeg, 0, if text.ends_with('\n') { len - 1 } else { len }, len, ) }); debug_assert!( (limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()), "{} <= {} <= {} <= {}", off, limit, end, text.len() ); match ele { Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end), Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end), Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end), Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end), Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end), Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end), Event::ListBeg { ordered, .. } => { self.push_stack(Container::List(limit, ordered), end, end) } _ => (), } self.off += off + start; ele } // returns (event, offset, container limit, container end) fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { debug_assert!(!text.starts_with('\n')); if text.starts_with("[fn:") { if let Some((label, cont, off)) = fn_def::parse(text) { return Some((Event::FnDef { label, cont }, off + 1, 0, 0)); } } let (tail, line_begin) = text .find(|c| c != ' ') .map(|off| (&text[off..], off)) .unwrap_or((text, 0)); if let Some((ordered, bullet)) = list::is_item(tail) { self.next_item.push(Some(bullet)); return Some((Event::ListBeg { ordered }, 0, line_begin, text.len())); } if tail.starts_with("CLOCK:") { if let Some((clock, off)) = Clock::parse(tail) { return Some((Event::Clock(clock), off + line_begin, 0, 0)); } } // TODO: LaTeX environment if tail.starts_with("\\begin{") {} // rule if tail.starts_with("-----") { let off = rule::parse(tail); if off != 0 { return Some((Event::Rule, off + line_begin, 0, 0)); } } if tail.starts_with(':') { if let Some((name, off, limit, end)) = drawer::parse(tail) { return Some(( Event::DrawerBeg(name), off + line_begin, limit + line_begin, end + line_begin, )); } } // fixed width if tail.starts_with(": ") || tail.starts_with(":\n") { // let end = line_ends // .skip_while(|&i| { // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n") // }) // .next() // .map(|i| i + 1) // .unwrap_or_else(|| text.len()); // let off = end - pos; // brk!(Element::FixedWidth(&tail[0..off]), off); } // comment if tail.starts_with("# ") || tail.starts_with("#\n") { // let end = line_ends // .skip_while(|&i| { // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n") // }) // .next() // .map(|i| i + 1) // .unwrap_or_else(|| text.len()); // let off = end - pos; // brk!(Element::Comment(&tail[0..off]), off); } if tail.starts_with("#+") { block::parse(tail) .map(|(name, args, begin, limit, end)| { let cont = &tail[begin..limit]; match &*name.to_uppercase() { "COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0), "EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0), "EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0), "SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0), "VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0), "CENTER" => ( Event::CtrBlockBeg, begin + line_begin, limit + line_begin, end + line_begin, ), "QUOTE" => ( Event::QteBlockBeg, begin + line_begin, limit + line_begin, end + line_begin, ), _ => ( Event::SplBlockBeg { name, args }, begin + line_begin, limit + line_begin, end + line_begin, ), } }) .or_else(|| { dyn_block::parse(tail).map(|(name, args, begin, limit, end)| { ( Event::DynBlockBeg { name, args }, begin + line_begin, limit + line_begin, end + line_begin, ) }) }) .or_else(|| { Keyword::parse(tail).map(|(key, option, value, off)| { ( if key.eq_ignore_ascii_case("CALL") { Event::Call { value } } else { Event::Keyword(Keyword::new(key, option, value)) }, off + line_begin, 0, 0, ) }) }) } else { None } } fn next_obj(&mut self, text: &'a str) -> Event<'a> { let (obj, off, limit, end) = self .obj_buf .take() .or_else(|| self.real_next_obj(text)) .unwrap_or_else(|| { let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); let bytes = text.as_bytes(); let mut pos = 0; while let Some(off) = bs.find(&bytes[pos..]) { pos += off + 1; if let Some(buf) = self.real_next_obj(&text[pos..]) { self.obj_buf = Some(buf); return (Event::Text(&text[0..pos]), pos, 0, 0); } } (Event::Text(text), text.len(), 0, 0) }); debug_assert!( off <= text.len() && limit <= text.len() && end <= text.len(), "{} <= {} <= {} <= {}", off, limit, end, text.len() ); self.off += off; match obj { Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end), Event::StrikeBeg => self.push_stack(Container::Strike, limit, end), Event::ItalicBeg => self.push_stack(Container::Italic, limit, end), Event::BoldBeg => self.push_stack(Container::Bold, limit, end), _ => (), } obj } fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { if text.len() < 3 { return None; } let bytes = text.as_bytes(); match bytes[0] { b'@' if bytes[1] == b'@' => { Snippet::parse(text).map(|(snippet, off)| (Event::Snippet(snippet), off, 0, 0)) } b'{' if bytes[1] == b'{' && bytes[2] == b'{' => { Macros::parse(text).map(|(macros, off)| (Event::Macros(macros), off, 0, 0)) } b'<' if bytes[1] == b'<' => { if bytes[2] == b'<' { radio_target::parse(text) .map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0)) } else { target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0)) } } b'<' => Timestamp::parse_active(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) .or_else(|| { Timestamp::parse_diary(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) }), b'[' => { if text[1..].starts_with("fn:") { FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0)) } else if bytes[1] == b'[' { Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0)) } else { Cookie::parse(text) .map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0)) .or_else(|| { Timestamp::parse_inactive(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) }) } } b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]), _ => self.next_inline(text), } } fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { match text.as_bytes()[0] { b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)), b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)), b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)), b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)), b'=' => emphasis::parse(text, b'=') .map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)), b'~' => { emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0)) } b's' if text.starts_with("src_") => { InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0)) } b'c' if text.starts_with("call_") => { InlineCall::parse(text).map(|(call, off)| (Event::InlineCall(call), off, 0, 0)) } _ => None, } } #[inline] fn push_stack(&mut self, container: Container, limit: usize, end: usize) { self.stack .push((container, self.off + limit, self.off + end)); } #[inline] fn end(&mut self) -> Event<'a> { let (container, _, _) = self.stack.pop().unwrap(); match container { Container::Bold => Event::BoldEnd, Container::Drawer => Event::DrawerEnd, Container::CtrBlock => Event::CtrBlockEnd, Container::DynBlock => Event::DynBlockEnd, Container::Headline(_) => Event::HeadlineEnd, Container::Italic => Event::ItalicEnd, Container::List(_, ordered) => Event::ListEnd { ordered }, Container::ListItem => Event::ListItemEnd, Container::Paragraph => Event::ParagraphEnd, Container::QteBlock => Event::QteBlockEnd, Container::Section(_) => Event::SectionEnd, Container::SplBlock => Event::SplBlockEnd, Container::Strike => Event::StrikeEnd, Container::Underline => Event::UnderlineEnd, } } } impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { if let Some(&(container, limit, end)) = self.stack.last() { let tail = &self.text[self.off..limit]; // eprint!("{:1$}", ' ', self.stack_depth()); // eprintln!("{:?} {:?} {:?}", container, tail, self.next_item); debug_assert!( self.off <= limit && limit <= end && end <= self.text.len(), "{} <= {} <= {} <= {}", self.off, limit, end, self.text.len() ); Some(match container { Container::Headline(beg) => { if self.off >= limit { self.off = end; self.stack.pop(); Event::HeadlineEnd } else if self.off == beg { self.next_section_or_headline(tail) } else { self.next_headline(tail) } } Container::Drawer | Container::DynBlock | Container::CtrBlock | Container::QteBlock | Container::SplBlock | Container::ListItem => { if self.off >= limit { self.off = end; self.end() } else { self.next_ele(tail) } } Container::Section(beg) => { // planning should be the first line of section if self.off >= limit { self.off = end; self.stack.pop(); Event::SectionEnd } else if self.off == beg { if let Some((planning, off)) = Planning::parse(tail) { self.off += off; Event::Planning(planning) } else { self.next_ele(tail) } } else { self.next_ele(tail) } } Container::List(ident, ordered) => { if let Some(bullet) = self.next_item.pop().unwrap() { let off = bullet.len() + ident; self.off += off; let (limit, end, next) = list::parse(&tail[off..], ident); self.push_stack(Container::ListItem, limit, end); self.next_item.push(next); Event::ListItemBeg { bullet } } else { self.off = end; self.stack.pop(); Event::ListEnd { ordered } } } Container::Paragraph | Container::Bold | Container::Underline | Container::Italic | Container::Strike => { if self.off >= limit { self.off = end; self.end() } else { self.next_obj(tail) } } }) } else if self.off < self.text.len() { Some(self.next_section_or_headline(&self.text[self.off..])) } else { None } } }