632 lines
19 KiB
Rust
632 lines
19 KiB
Rust
//! Parser
|
|
|
|
use crate::{elements::*, headline::*, objects::*};
|
|
use jetscii::bytes;
|
|
use memchr::memchr_iter;
|
|
|
|
#[cfg_attr(test, derive(PartialEq))]
|
|
#[derive(Copy, Clone, Debug)]
|
|
enum Container {
|
|
Headline(usize),
|
|
Section,
|
|
Paragraph,
|
|
CtrBlock,
|
|
QteBlock,
|
|
SplBlock,
|
|
DynBlock,
|
|
List(usize, bool),
|
|
ListItem,
|
|
Italic,
|
|
Strike,
|
|
Bold,
|
|
Underline,
|
|
}
|
|
|
|
#[cfg_attr(test, derive(PartialEq))]
|
|
#[derive(Debug)]
|
|
pub enum Event<'a> {
|
|
HeadlineBeg(Headline<'a>),
|
|
HeadlineEnd,
|
|
|
|
SectionBeg,
|
|
SectionEnd,
|
|
|
|
ParagraphBeg,
|
|
ParagraphEnd,
|
|
|
|
CtrBlockBeg,
|
|
CtrBlockEnd,
|
|
QteBlockBeg,
|
|
QteBlockEnd,
|
|
SplBlockBeg {
|
|
name: &'a str,
|
|
args: Option<&'a str>,
|
|
},
|
|
SplBlockEnd,
|
|
DynBlockBeg {
|
|
name: &'a str,
|
|
args: Option<&'a str>,
|
|
},
|
|
DynBlockEnd,
|
|
|
|
CommentBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
ExampleBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
ExportBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
SrcBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
VerseBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
|
|
ListBeg {
|
|
ordered: bool,
|
|
},
|
|
ListEnd {
|
|
ordered: bool,
|
|
},
|
|
ListItemBeg {
|
|
bullet: &'a str,
|
|
},
|
|
ListItemEnd,
|
|
|
|
Call {
|
|
value: &'a str,
|
|
},
|
|
|
|
Clock,
|
|
|
|
Comment(&'a str),
|
|
FixedWidth(&'a str),
|
|
|
|
TableStart,
|
|
TableEnd,
|
|
TableCell,
|
|
|
|
LatexEnv,
|
|
FnDef {
|
|
label: &'a str,
|
|
cont: &'a str,
|
|
},
|
|
Keyword {
|
|
key: Key<'a>,
|
|
value: &'a str,
|
|
},
|
|
Rule,
|
|
|
|
Timestamp(Timestamp<'a>),
|
|
Cookie(Cookie<'a>),
|
|
FnRef {
|
|
label: Option<&'a str>,
|
|
def: Option<&'a str>,
|
|
},
|
|
InlineCall {
|
|
name: &'a str,
|
|
args: &'a str,
|
|
inside_header: Option<&'a str>,
|
|
end_header: Option<&'a str>,
|
|
},
|
|
InlineSrc {
|
|
lang: &'a str,
|
|
option: Option<&'a str>,
|
|
body: &'a str,
|
|
},
|
|
Link {
|
|
path: &'a str,
|
|
desc: Option<&'a str>,
|
|
},
|
|
Macros {
|
|
name: &'a str,
|
|
args: Option<&'a str>,
|
|
},
|
|
RadioTarget {
|
|
target: &'a str,
|
|
},
|
|
Snippet {
|
|
name: &'a str,
|
|
value: &'a str,
|
|
},
|
|
Target {
|
|
target: &'a str,
|
|
},
|
|
|
|
BoldBeg,
|
|
BoldEnd,
|
|
ItalicBeg,
|
|
ItalicEnd,
|
|
StrikeBeg,
|
|
StrikeEnd,
|
|
UnderlineBeg,
|
|
UnderlineEnd,
|
|
|
|
Verbatim(&'a str),
|
|
Code(&'a str),
|
|
Text(&'a str),
|
|
}
|
|
|
|
pub struct Parser<'a> {
|
|
text: &'a str,
|
|
stack: Vec<(Container, usize, usize)>,
|
|
off: usize,
|
|
ele_buf: Option<(Event<'a>, usize, usize, usize)>,
|
|
obj_buf: Option<(Event<'a>, usize, usize, usize)>,
|
|
keywords: &'a [&'a str],
|
|
list_more_item: bool,
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
/// creates a new parser from string
|
|
pub fn new(text: &'a str) -> Parser<'a> {
|
|
Parser {
|
|
text,
|
|
stack: Vec::new(),
|
|
off: 0,
|
|
ele_buf: None,
|
|
obj_buf: None,
|
|
list_more_item: false,
|
|
keywords: DEFAULT_KEYWORDS,
|
|
}
|
|
}
|
|
|
|
/// returns current offset
|
|
pub fn offset(&self) -> usize {
|
|
self.off
|
|
}
|
|
|
|
/// returns current stack depth
|
|
pub fn stack_depth(&self) -> usize {
|
|
self.stack.len()
|
|
}
|
|
|
|
pub fn set_keywords(&mut self, keywords: &'a [&'a str]) {
|
|
self.keywords = keywords;
|
|
}
|
|
|
|
fn next_section_or_headline(&mut self) -> Event<'a> {
|
|
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
|
|
debug_assert!(end <= self.text[self.off..].len());
|
|
if end != 0 {
|
|
self.push_stack(Container::Section, end, end);
|
|
Event::SectionBeg
|
|
} else {
|
|
self.next_headline()
|
|
}
|
|
}
|
|
|
|
fn next_headline(&mut self) -> Event<'a> {
|
|
let (hdl, off, end) = Headline::parse(&self.text[self.off..], self.keywords);
|
|
debug_assert!(end <= self.text[self.off..].len());
|
|
self.push_stack(Container::Headline(self.off + off), end, end);
|
|
self.off += off;
|
|
Event::HeadlineBeg(hdl)
|
|
}
|
|
|
|
fn next_ele(&mut self, text: &'a str) -> Event<'a> {
|
|
let (ele, off, limit, end) = self
|
|
.ele_buf
|
|
.take()
|
|
.or_else(|| self.real_next_ele(text))
|
|
.unwrap_or_else(|| {
|
|
let len = text.len();
|
|
let start = text.find(|c| c != '\n').unwrap_or(0);
|
|
if start == len - 1 {
|
|
(self.end(), len, 0, 0)
|
|
} else {
|
|
let mut pos = start;
|
|
for off in memchr_iter(b'\n', &text.as_bytes()[start..]) {
|
|
if text[pos..off + start].trim().is_empty() {
|
|
return (Event::ParagraphBeg, start, pos, off + start);
|
|
} else {
|
|
pos = off + start;
|
|
if let Some(buf) = self.real_next_ele(&text[pos + 1..]) {
|
|
self.ele_buf = Some(buf);
|
|
return (Event::ParagraphBeg, start, pos, pos);
|
|
}
|
|
}
|
|
}
|
|
(
|
|
Event::ParagraphBeg,
|
|
start,
|
|
if text.ends_with('\n') { len - 1 } else { len },
|
|
len,
|
|
)
|
|
}
|
|
});
|
|
|
|
debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len());
|
|
|
|
match ele {
|
|
Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end),
|
|
Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end),
|
|
Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end),
|
|
Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end),
|
|
Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end),
|
|
Event::ListBeg { ordered, .. } => {
|
|
self.push_stack(Container::List(limit, ordered), end, end);
|
|
self.list_more_item = true;
|
|
}
|
|
_ => (),
|
|
}
|
|
|
|
self.off += off;
|
|
|
|
ele
|
|
}
|
|
|
|
// returns (event, offset, container limit, container end)
|
|
fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
if text.starts_with("[fn:") {
|
|
if let Some((label, cont, off)) = fn_def::parse(text) {
|
|
return Some((Event::FnDef { label, cont }, off + 1, 0, 0));
|
|
}
|
|
}
|
|
|
|
let (tail, line_begin) = text
|
|
.find(|c| c != ' ')
|
|
.map(|off| (&text[off..], off))
|
|
.unwrap_or((text, 0));
|
|
|
|
let (is_item, ordered) = list::is_item(tail);
|
|
if is_item {
|
|
return Some((Event::ListBeg { ordered }, 0, line_begin, text.len()));
|
|
}
|
|
|
|
// TODO: LaTeX environment
|
|
if tail.starts_with("\\begin{") {}
|
|
|
|
// rule
|
|
if tail.starts_with("-----") {
|
|
let off = rule::parse(tail);
|
|
if off != 0 {
|
|
return Some((Event::Rule, off, 0, 0));
|
|
}
|
|
}
|
|
|
|
// fixed width
|
|
if tail.starts_with(": ") || tail.starts_with(":\n") {
|
|
// let end = line_ends
|
|
// .skip_while(|&i| {
|
|
// text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n")
|
|
// })
|
|
// .next()
|
|
// .map(|i| i + 1)
|
|
// .unwrap_or_else(|| text.len());
|
|
// let off = end - pos;
|
|
// brk!(Element::FixedWidth(&tail[0..off]), off);
|
|
}
|
|
|
|
// comment
|
|
if tail.starts_with("# ") || tail.starts_with("#\n") {
|
|
// let end = line_ends
|
|
// .skip_while(|&i| {
|
|
// text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n")
|
|
// })
|
|
// .next()
|
|
// .map(|i| i + 1)
|
|
// .unwrap_or_else(|| text.len());
|
|
// let off = end - pos;
|
|
// brk!(Element::Comment(&tail[0..off]), off);
|
|
}
|
|
|
|
if tail.starts_with("#+") {
|
|
block::parse(tail)
|
|
.map(|(name, args, begin, limit, end)| {
|
|
let cont = &tail[begin..limit];
|
|
match &*name.to_uppercase() {
|
|
"COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0),
|
|
"EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0),
|
|
"EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0),
|
|
"SRC" => (Event::SrcBlock { args, cont }, end, 0, 0),
|
|
"VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0),
|
|
"CENTER" => (Event::CtrBlockBeg, begin, limit, end),
|
|
"QUOTE" => (Event::QteBlockBeg, begin, limit, end),
|
|
_ => (Event::SplBlockBeg { name, args }, begin, limit, end),
|
|
}
|
|
})
|
|
.or_else(|| {
|
|
dyn_block::parse(tail).map(|(name, args, begin, limit, end)| {
|
|
(Event::DynBlockBeg { name, args }, begin, limit, end)
|
|
})
|
|
})
|
|
.or_else(|| {
|
|
keyword::parse(tail).map(|(key, value, off)| {
|
|
if let Key::Call = key {
|
|
(Event::Call { value }, off, 0, 0)
|
|
} else {
|
|
(Event::Keyword { key, value }, off, 0, 0)
|
|
}
|
|
})
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
|
|
let (obj, off, limit, end) = self
|
|
.obj_buf
|
|
.take()
|
|
.or_else(|| self.real_next_obj(text))
|
|
.unwrap_or_else(|| {
|
|
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
|
|
let bytes = text.as_bytes();
|
|
let mut pos = 0;
|
|
|
|
while let Some(off) = bs.find(&bytes[pos..]) {
|
|
pos += off + 1;
|
|
|
|
if let Some(buf) = self.real_next_obj(&text[pos..]) {
|
|
self.obj_buf = Some(buf);
|
|
return (Event::Text(&text[0..pos]), pos, 0, 0);
|
|
}
|
|
}
|
|
|
|
(Event::Text(text), text.len(), 0, 0)
|
|
});
|
|
|
|
debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len());
|
|
|
|
self.off += off;
|
|
|
|
match obj {
|
|
Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end),
|
|
Event::StrikeBeg => self.push_stack(Container::Strike, limit, end),
|
|
Event::ItalicBeg => self.push_stack(Container::Italic, limit, end),
|
|
Event::BoldBeg => self.push_stack(Container::Bold, limit, end),
|
|
_ => (),
|
|
}
|
|
|
|
obj
|
|
}
|
|
|
|
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
if text.len() < 3 {
|
|
return None;
|
|
}
|
|
|
|
let bytes = text.as_bytes();
|
|
match bytes[0] {
|
|
b'@' if bytes[1] == b'@' => snippet::parse(text)
|
|
.map(|(name, value, off)| (Event::Snippet { name, value }, off, 0, 0)),
|
|
b'{' if bytes[1] == b'{' && bytes[2] == b'{' => macros::parse(text)
|
|
.map(|(name, args, off)| (Event::Macros { name, args }, off, 0, 0)),
|
|
b'<' if bytes[1] == b'<' => {
|
|
if bytes[2] == b'<' {
|
|
radio_target::parse(text)
|
|
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
|
|
} else {
|
|
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
|
}
|
|
}
|
|
b'<' => timestamp::parse_active(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
.or_else(|| {
|
|
timestamp::parse_diary(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
}),
|
|
b'[' => {
|
|
if text[1..].starts_with("fn:") {
|
|
fn_ref::parse(text)
|
|
.map(|(label, def, off)| (Event::FnRef { label, def }, off, 0, 0))
|
|
} else if bytes[1] == b'[' {
|
|
link::parse(text)
|
|
.map(|(path, desc, off)| (Event::Link { path, desc }, off, 0, 0))
|
|
} else {
|
|
cookie::parse(text)
|
|
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
|
|
.or_else(|| {
|
|
timestamp::parse_inactive(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
})
|
|
}
|
|
}
|
|
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]),
|
|
_ => self.next_inline(text),
|
|
}
|
|
}
|
|
|
|
fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
match text.as_bytes()[0] {
|
|
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
|
|
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
|
|
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
|
|
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
|
|
b'=' => emphasis::parse(text, b'=')
|
|
.map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)),
|
|
b'~' => {
|
|
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0))
|
|
}
|
|
b's' if text.starts_with("src_") => {
|
|
inline_src::parse(text).map(|(lang, option, body, off)| {
|
|
(Event::InlineSrc { lang, option, body }, off, 0, 0)
|
|
})
|
|
}
|
|
b'c' if text.starts_with("call_") => {
|
|
inline_call::parse(text).map(|(name, args, inside_header, end_header, off)| {
|
|
(
|
|
Event::InlineCall {
|
|
name,
|
|
args,
|
|
inside_header,
|
|
end_header,
|
|
},
|
|
off,
|
|
0,
|
|
0,
|
|
)
|
|
})
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn next_list_item(&mut self, ident: usize, text: &'a str) -> Event<'a> {
|
|
let (bullet, off, limit, end, has_more) = list::parse(text, ident);
|
|
self.push_stack(Container::ListItem, limit, end);
|
|
self.off += off;
|
|
self.list_more_item = has_more;
|
|
Event::ListItemBeg { bullet }
|
|
}
|
|
|
|
#[inline]
|
|
fn push_stack(&mut self, container: Container, limit: usize, end: usize) {
|
|
self.stack
|
|
.push((container, self.off + limit, self.off + end));
|
|
}
|
|
|
|
#[inline]
|
|
fn end(&mut self) -> Event<'a> {
|
|
let (container, _, _) = self.stack.pop().unwrap();
|
|
match container {
|
|
Container::Bold => Event::BoldEnd,
|
|
Container::CtrBlock => Event::CtrBlockEnd,
|
|
Container::DynBlock => Event::DynBlockEnd,
|
|
Container::Headline(_) => Event::HeadlineEnd,
|
|
Container::Italic => Event::ItalicEnd,
|
|
Container::List(_, ordered) => Event::ListEnd { ordered },
|
|
Container::ListItem => Event::ListItemEnd,
|
|
Container::Paragraph => Event::ParagraphEnd,
|
|
Container::QteBlock => Event::QteBlockEnd,
|
|
Container::Section => Event::SectionEnd,
|
|
Container::SplBlock => Event::SplBlockEnd,
|
|
Container::Strike => Event::StrikeEnd,
|
|
Container::Underline => Event::UnderlineEnd,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for Parser<'a> {
|
|
type Item = Event<'a>;
|
|
|
|
fn next(&mut self) -> Option<Event<'a>> {
|
|
if let Some(&(container, limit, end)) = self.stack.last() {
|
|
Some(if self.off >= limit {
|
|
debug_assert!(self.off <= limit && self.off <= end);
|
|
self.off = end;
|
|
self.end()
|
|
} else {
|
|
match container {
|
|
Container::Headline(beg) => {
|
|
debug_assert!(self.off >= beg);
|
|
if self.off == beg {
|
|
self.next_section_or_headline()
|
|
} else {
|
|
self.next_headline()
|
|
}
|
|
}
|
|
Container::DynBlock
|
|
| Container::CtrBlock
|
|
| Container::QteBlock
|
|
| Container::SplBlock
|
|
| Container::ListItem
|
|
| Container::Section => self.next_ele(&self.text[self.off..limit]),
|
|
Container::List(ident, _) => {
|
|
if self.list_more_item {
|
|
self.next_list_item(ident, &self.text[self.off..limit])
|
|
} else {
|
|
self.end()
|
|
}
|
|
}
|
|
Container::Paragraph
|
|
| Container::Bold
|
|
| Container::Underline
|
|
| Container::Italic
|
|
| Container::Strike => self.next_obj(&self.text[self.off..limit]),
|
|
}
|
|
})
|
|
} else if self.off < self.text.len() {
|
|
Some(self.next_section_or_headline())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn parse() {
|
|
use self::Event::*;
|
|
|
|
let expected = vec![
|
|
HeadlineBeg(Headline {
|
|
level: 1,
|
|
priority: None,
|
|
keyword: None,
|
|
title: "Title 1",
|
|
tags: None,
|
|
}),
|
|
SectionBeg,
|
|
ParagraphBeg,
|
|
Text("test "),
|
|
BoldBeg,
|
|
Text("Section 1"),
|
|
BoldEnd,
|
|
ParagraphEnd,
|
|
SectionEnd,
|
|
HeadlineBeg(Headline {
|
|
level: 2,
|
|
priority: None,
|
|
keyword: None,
|
|
title: "Title 2",
|
|
tags: None,
|
|
}),
|
|
SectionBeg,
|
|
ParagraphBeg,
|
|
UnderlineBeg,
|
|
Text("Section 2"),
|
|
UnderlineEnd,
|
|
ParagraphEnd,
|
|
SectionEnd,
|
|
HeadlineEnd,
|
|
HeadlineEnd,
|
|
HeadlineBeg(Headline {
|
|
level: 1,
|
|
priority: None,
|
|
keyword: None,
|
|
title: "Title 3",
|
|
tags: None,
|
|
}),
|
|
SectionBeg,
|
|
ParagraphBeg,
|
|
ItalicBeg,
|
|
Text("Section 3"),
|
|
ItalicEnd,
|
|
ParagraphEnd,
|
|
SectionEnd,
|
|
HeadlineEnd,
|
|
HeadlineBeg(Headline {
|
|
level: 1,
|
|
priority: None,
|
|
keyword: None,
|
|
title: "Title 4",
|
|
tags: None,
|
|
}),
|
|
SectionBeg,
|
|
ParagraphBeg,
|
|
Verbatim("Section 4"),
|
|
ParagraphEnd,
|
|
SectionEnd,
|
|
HeadlineEnd,
|
|
];
|
|
|
|
assert_eq!(
|
|
Parser::new(
|
|
r#"#+OPTIONS: H:3 num:nil toc:t \n:nil ::t |:t ^:t -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc
|
|
|
|
* Definitions
|
|
"#
|
|
)
|
|
.collect::<Vec<_>>(),
|
|
expected
|
|
);
|
|
}
|