//! Parser use crate::{elements::*, headline::*, objects::*}; use jetscii::bytes; use memchr::memchr_iter; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] enum Container { Headline(usize), Section, Paragraph, CtrBlock, QteBlock, SplBlock, DynBlock, List(usize, bool), ListItem, Italic, Strike, Bold, Underline, } #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub enum Event<'a> { HeadlineBeg(Headline<'a>), HeadlineEnd, SectionBeg, SectionEnd, ParagraphBeg, ParagraphEnd, CtrBlockBeg, CtrBlockEnd, QteBlockBeg, QteBlockEnd, SplBlockBeg { name: &'a str, args: Option<&'a str>, }, SplBlockEnd, DynBlockBeg { name: &'a str, args: Option<&'a str>, }, DynBlockEnd, CommentBlock { args: Option<&'a str>, cont: &'a str, }, ExampleBlock { args: Option<&'a str>, cont: &'a str, }, ExportBlock { args: Option<&'a str>, cont: &'a str, }, SrcBlock { args: Option<&'a str>, cont: &'a str, }, VerseBlock { args: Option<&'a str>, cont: &'a str, }, ListBeg { ordered: bool, }, ListEnd { ordered: bool, }, ListItemBeg { bullet: &'a str, }, ListItemEnd, Call { value: &'a str, }, Clock, Comment(&'a str), FixedWidth(&'a str), TableStart, TableEnd, TableCell, LatexEnv, FnDef { label: &'a str, cont: &'a str, }, Keyword { key: Key<'a>, value: &'a str, }, Rule, Timestamp(Timestamp<'a>), Cookie(Cookie<'a>), FnRef { label: Option<&'a str>, def: Option<&'a str>, }, InlineCall { name: &'a str, args: &'a str, inside_header: Option<&'a str>, end_header: Option<&'a str>, }, InlineSrc { lang: &'a str, option: Option<&'a str>, body: &'a str, }, Link { path: &'a str, desc: Option<&'a str>, }, Macros { name: &'a str, args: Option<&'a str>, }, RadioTarget { target: &'a str, }, Snippet { name: &'a str, value: &'a str, }, Target { target: &'a str, }, BoldBeg, BoldEnd, ItalicBeg, ItalicEnd, StrikeBeg, StrikeEnd, UnderlineBeg, UnderlineEnd, Verbatim(&'a str), Code(&'a str), Text(&'a str), } pub struct Parser<'a> { text: &'a str, stack: Vec<(Container, usize, usize)>, off: usize, ele_buf: Option<(Event<'a>, usize, usize, usize)>, obj_buf: Option<(Event<'a>, usize, usize, usize)>, keywords: &'a [&'a str], list_more_item: bool, } impl<'a> Parser<'a> { /// creates a new parser from string pub fn new(text: &'a str) -> Parser<'a> { Parser { text, stack: Vec::new(), off: 0, ele_buf: None, obj_buf: None, list_more_item: false, keywords: DEFAULT_KEYWORDS, } } /// returns current offset pub fn offset(&self) -> usize { self.off } /// returns current stack depth pub fn stack_depth(&self) -> usize { self.stack.len() } pub fn set_keywords(&mut self, keywords: &'a [&'a str]) { self.keywords = keywords; } fn next_section_or_headline(&mut self) -> Event<'a> { let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); debug_assert!(end <= self.text[self.off..].len()); if end != 0 { self.push_stack(Container::Section, end, end); Event::SectionBeg } else { self.next_headline() } } fn next_headline(&mut self) -> Event<'a> { let (hdl, off, end) = Headline::parse(&self.text[self.off..], self.keywords); debug_assert!(end <= self.text[self.off..].len()); self.push_stack(Container::Headline(self.off + off), end, end); self.off += off; Event::HeadlineBeg(hdl) } fn next_ele(&mut self, text: &'a str) -> Event<'a> { let (ele, off, limit, end) = self .ele_buf .take() .or_else(|| self.real_next_ele(text)) .unwrap_or_else(|| { let len = text.len(); let start = text.find(|c| c != '\n').unwrap_or(0); if start == len - 1 { (self.end(), len, 0, 0) } else { let mut pos = start; for off in memchr_iter(b'\n', &text.as_bytes()[start..]) { if text[pos..off + start].trim().is_empty() { return (Event::ParagraphBeg, start, pos, off + start); } else { pos = off + start; if let Some(buf) = self.real_next_ele(&text[pos + 1..]) { self.ele_buf = Some(buf); return (Event::ParagraphBeg, start, pos, pos); } } } ( Event::ParagraphBeg, start, if text.ends_with('\n') { len - 1 } else { len }, len, ) } }); debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); match ele { Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end), Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end), Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end), Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end), Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end), Event::ListBeg { ordered, .. } => { self.push_stack(Container::List(limit, ordered), end, end); self.list_more_item = true; } _ => (), } self.off += off; ele } // returns (event, offset, container limit, container end) fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { if text.starts_with("[fn:") { if let Some((label, cont, off)) = fn_def::parse(text) { return Some((Event::FnDef { label, cont }, off + 1, 0, 0)); } } let (tail, line_begin) = text .find(|c| c != ' ') .map(|off| (&text[off..], off)) .unwrap_or((text, 0)); let (is_item, ordered) = list::is_item(tail); if is_item { return Some((Event::ListBeg { ordered }, 0, line_begin, text.len())); } // TODO: LaTeX environment if tail.starts_with("\\begin{") {} // rule if tail.starts_with("-----") { let off = rule::parse(tail); if off != 0 { return Some((Event::Rule, off, 0, 0)); } } // fixed width if tail.starts_with(": ") || tail.starts_with(":\n") { // let end = line_ends // .skip_while(|&i| { // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n") // }) // .next() // .map(|i| i + 1) // .unwrap_or_else(|| text.len()); // let off = end - pos; // brk!(Element::FixedWidth(&tail[0..off]), off); } // comment if tail.starts_with("# ") || tail.starts_with("#\n") { // let end = line_ends // .skip_while(|&i| { // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n") // }) // .next() // .map(|i| i + 1) // .unwrap_or_else(|| text.len()); // let off = end - pos; // brk!(Element::Comment(&tail[0..off]), off); } if tail.starts_with("#+") { block::parse(tail) .map(|(name, args, begin, limit, end)| { let cont = &tail[begin..limit]; match &*name.to_uppercase() { "COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0), "EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0), "EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0), "SRC" => (Event::SrcBlock { args, cont }, end, 0, 0), "VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0), "CENTER" => (Event::CtrBlockBeg, begin, limit, end), "QUOTE" => (Event::QteBlockBeg, begin, limit, end), _ => (Event::SplBlockBeg { name, args }, begin, limit, end), } }) .or_else(|| { dyn_block::parse(tail).map(|(name, args, begin, limit, end)| { (Event::DynBlockBeg { name, args }, begin, limit, end) }) }) .or_else(|| { keyword::parse(tail).map(|(key, value, off)| { if let Key::Call = key { (Event::Call { value }, off, 0, 0) } else { (Event::Keyword { key, value }, off, 0, 0) } }) }) } else { None } } fn next_obj(&mut self, text: &'a str) -> Event<'a> { let (obj, off, limit, end) = self .obj_buf .take() .or_else(|| self.real_next_obj(text)) .unwrap_or_else(|| { let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); let bytes = text.as_bytes(); let mut pos = 0; while let Some(off) = bs.find(&bytes[pos..]) { pos += off + 1; if let Some(buf) = self.real_next_obj(&text[pos..]) { self.obj_buf = Some(buf); return (Event::Text(&text[0..pos]), pos, 0, 0); } } (Event::Text(text), text.len(), 0, 0) }); debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); self.off += off; match obj { Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end), Event::StrikeBeg => self.push_stack(Container::Strike, limit, end), Event::ItalicBeg => self.push_stack(Container::Italic, limit, end), Event::BoldBeg => self.push_stack(Container::Bold, limit, end), _ => (), } obj } fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { if text.len() < 3 { return None; } let bytes = text.as_bytes(); match bytes[0] { b'@' if bytes[1] == b'@' => snippet::parse(text) .map(|(name, value, off)| (Event::Snippet { name, value }, off, 0, 0)), b'{' if bytes[1] == b'{' && bytes[2] == b'{' => macros::parse(text) .map(|(name, args, off)| (Event::Macros { name, args }, off, 0, 0)), b'<' if bytes[1] == b'<' => { if bytes[2] == b'<' { radio_target::parse(text) .map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0)) } else { target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0)) } } b'<' => timestamp::parse_active(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) .or_else(|| { timestamp::parse_diary(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) }), b'[' => { if text[1..].starts_with("fn:") { fn_ref::parse(text) .map(|(label, def, off)| (Event::FnRef { label, def }, off, 0, 0)) } else if bytes[1] == b'[' { link::parse(text) .map(|(path, desc, off)| (Event::Link { path, desc }, off, 0, 0)) } else { cookie::parse(text) .map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0)) .or_else(|| { timestamp::parse_inactive(text) .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) }) } } b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]), _ => self.next_inline(text), } } fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { match text.as_bytes()[0] { b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)), b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)), b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)), b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)), b'=' => emphasis::parse(text, b'=') .map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)), b'~' => { emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0)) } b's' if text.starts_with("src_") => { inline_src::parse(text).map(|(lang, option, body, off)| { (Event::InlineSrc { lang, option, body }, off, 0, 0) }) } b'c' if text.starts_with("call_") => { inline_call::parse(text).map(|(name, args, inside_header, end_header, off)| { ( Event::InlineCall { name, args, inside_header, end_header, }, off, 0, 0, ) }) } _ => None, } } fn next_list_item(&mut self, ident: usize, text: &'a str) -> Event<'a> { let (bullet, off, limit, end, has_more) = list::parse(text, ident); self.push_stack(Container::ListItem, limit, end); self.off += off; self.list_more_item = has_more; Event::ListItemBeg { bullet } } #[inline] fn push_stack(&mut self, container: Container, limit: usize, end: usize) { self.stack .push((container, self.off + limit, self.off + end)); } #[inline] fn end(&mut self) -> Event<'a> { let (container, _, _) = self.stack.pop().unwrap(); match container { Container::Bold => Event::BoldEnd, Container::CtrBlock => Event::CtrBlockEnd, Container::DynBlock => Event::DynBlockEnd, Container::Headline(_) => Event::HeadlineEnd, Container::Italic => Event::ItalicEnd, Container::List(_, ordered) => Event::ListEnd { ordered }, Container::ListItem => Event::ListItemEnd, Container::Paragraph => Event::ParagraphEnd, Container::QteBlock => Event::QteBlockEnd, Container::Section => Event::SectionEnd, Container::SplBlock => Event::SplBlockEnd, Container::Strike => Event::StrikeEnd, Container::Underline => Event::UnderlineEnd, } } } impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { if let Some(&(container, limit, end)) = self.stack.last() { Some(if self.off >= limit { debug_assert!(self.off <= limit && self.off <= end); self.off = end; self.end() } else { match container { Container::Headline(beg) => { debug_assert!(self.off >= beg); if self.off == beg { self.next_section_or_headline() } else { self.next_headline() } } Container::DynBlock | Container::CtrBlock | Container::QteBlock | Container::SplBlock | Container::ListItem | Container::Section => self.next_ele(&self.text[self.off..limit]), Container::List(ident, _) => { if self.list_more_item { self.next_list_item(ident, &self.text[self.off..limit]) } else { self.end() } } Container::Paragraph | Container::Bold | Container::Underline | Container::Italic | Container::Strike => self.next_obj(&self.text[self.off..limit]), } }) } else if self.off < self.text.len() { Some(self.next_section_or_headline()) } else { None } } } #[test] fn parse() { use self::Event::*; let expected = vec![ HeadlineBeg(Headline { level: 1, priority: None, keyword: None, title: "Title 1", tags: None, }), SectionBeg, ParagraphBeg, Text("test "), BoldBeg, Text("Section 1"), BoldEnd, ParagraphEnd, SectionEnd, HeadlineBeg(Headline { level: 2, priority: None, keyword: None, title: "Title 2", tags: None, }), SectionBeg, ParagraphBeg, UnderlineBeg, Text("Section 2"), UnderlineEnd, ParagraphEnd, SectionEnd, HeadlineEnd, HeadlineEnd, HeadlineBeg(Headline { level: 1, priority: None, keyword: None, title: "Title 3", tags: None, }), SectionBeg, ParagraphBeg, ItalicBeg, Text("Section 3"), ItalicEnd, ParagraphEnd, SectionEnd, HeadlineEnd, HeadlineBeg(Headline { level: 1, priority: None, keyword: None, title: "Title 4", tags: None, }), SectionBeg, ParagraphBeg, Verbatim("Section 4"), ParagraphEnd, SectionEnd, HeadlineEnd, ]; assert_eq!( Parser::new( r#"#+OPTIONS: H:3 num:nil toc:t \n:nil ::t |:t ^:t -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc * Definitions "# ) .collect::>(), expected ); }