660 lines
21 KiB
Rust
660 lines
21 KiB
Rust
//! Parser
|
|
|
|
use crate::{elements::*, headline::*, objects::*};
|
|
use jetscii::bytes;
|
|
use memchr::memchr_iter;
|
|
|
|
#[cfg_attr(test, derive(PartialEq))]
|
|
#[derive(Copy, Clone, Debug)]
|
|
enum Container {
|
|
Headline(usize),
|
|
Section(usize),
|
|
Drawer,
|
|
Paragraph,
|
|
CtrBlock,
|
|
QteBlock,
|
|
SplBlock,
|
|
DynBlock,
|
|
List(usize, bool),
|
|
ListItem,
|
|
Italic,
|
|
Strike,
|
|
Bold,
|
|
Underline,
|
|
}
|
|
|
|
#[cfg_attr(test, derive(PartialEq))]
|
|
#[derive(Debug)]
|
|
pub enum Event<'a> {
|
|
HeadlineBeg(Headline<'a>),
|
|
HeadlineEnd,
|
|
|
|
SectionBeg,
|
|
SectionEnd,
|
|
|
|
ParagraphBeg,
|
|
ParagraphEnd,
|
|
|
|
CtrBlockBeg,
|
|
CtrBlockEnd,
|
|
QteBlockBeg,
|
|
QteBlockEnd,
|
|
SplBlockBeg {
|
|
name: &'a str,
|
|
args: Option<&'a str>,
|
|
},
|
|
SplBlockEnd,
|
|
DynBlockBeg {
|
|
name: &'a str,
|
|
args: Option<&'a str>,
|
|
},
|
|
DynBlockEnd,
|
|
|
|
CommentBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
ExampleBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
ExportBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
SrcBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
VerseBlock {
|
|
args: Option<&'a str>,
|
|
cont: &'a str,
|
|
},
|
|
|
|
ListBeg {
|
|
ordered: bool,
|
|
},
|
|
ListEnd {
|
|
ordered: bool,
|
|
},
|
|
ListItemBeg {
|
|
bullet: &'a str,
|
|
},
|
|
ListItemEnd,
|
|
|
|
Call {
|
|
value: &'a str,
|
|
},
|
|
|
|
Clock(Clock<'a>),
|
|
|
|
Comment(&'a str),
|
|
FixedWidth(&'a str),
|
|
|
|
Planning(Planning<'a>),
|
|
|
|
DrawerBeg(&'a str),
|
|
DrawerEnd,
|
|
|
|
TableStart,
|
|
TableEnd,
|
|
TableCell,
|
|
|
|
LatexEnv,
|
|
FnDef {
|
|
label: &'a str,
|
|
cont: &'a str,
|
|
},
|
|
Keyword(Keyword<'a>),
|
|
Rule,
|
|
|
|
Timestamp(Timestamp<'a>),
|
|
Cookie(Cookie<'a>),
|
|
FnRef(FnRef<'a>),
|
|
InlineCall(InlineCall<'a>),
|
|
InlineSrc(InlineSrc<'a>),
|
|
Link(Link<'a>),
|
|
Macros(Macros<'a>),
|
|
RadioTarget {
|
|
target: &'a str,
|
|
},
|
|
Snippet(Snippet<'a>),
|
|
Target {
|
|
target: &'a str,
|
|
},
|
|
|
|
BoldBeg,
|
|
BoldEnd,
|
|
ItalicBeg,
|
|
ItalicEnd,
|
|
StrikeBeg,
|
|
StrikeEnd,
|
|
UnderlineBeg,
|
|
UnderlineEnd,
|
|
|
|
Verbatim(&'a str),
|
|
Code(&'a str),
|
|
Text(&'a str),
|
|
}
|
|
|
|
pub struct Parser<'a> {
|
|
text: &'a str,
|
|
stack: Vec<(Container, usize, usize)>,
|
|
next_item: Vec<Option<&'a str>>,
|
|
off: usize,
|
|
ele_buf: Option<(Event<'a>, usize, usize, usize)>,
|
|
obj_buf: Option<(Event<'a>, usize, usize, usize)>,
|
|
todo_keywords: &'a [&'a str],
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
/// creates a new parser from string
|
|
pub fn new(text: &'a str) -> Parser<'a> {
|
|
Parser {
|
|
text,
|
|
stack: Vec::new(),
|
|
next_item: Vec::new(),
|
|
off: 0,
|
|
ele_buf: None,
|
|
obj_buf: None,
|
|
todo_keywords: DEFAULT_TODO_KEYWORDS,
|
|
}
|
|
}
|
|
|
|
/// creates a new parser from string, with the specified keywords
|
|
pub fn with_todo_keywrods(text: &'a str, todo_keywords: &'a [&'a str]) -> Parser<'a> {
|
|
Parser {
|
|
text,
|
|
stack: Vec::new(),
|
|
next_item: Vec::new(),
|
|
off: 0,
|
|
ele_buf: None,
|
|
obj_buf: None,
|
|
todo_keywords,
|
|
}
|
|
}
|
|
|
|
/// returns current offset
|
|
pub fn offset(&self) -> usize {
|
|
self.off
|
|
}
|
|
|
|
/// returns current stack depth
|
|
pub fn stack_depth(&self) -> usize {
|
|
self.stack.len()
|
|
}
|
|
|
|
/// set todo keywords
|
|
pub fn set_todo_keywords(&mut self, todo_keywords: &'a [&'a str]) {
|
|
self.todo_keywords = todo_keywords;
|
|
}
|
|
|
|
/// set text
|
|
pub fn set_text(&mut self, text: &'a str) {
|
|
self.off = 0;
|
|
self.stack.clear();
|
|
self.next_item.clear();
|
|
self.ele_buf = None;
|
|
self.obj_buf = None;
|
|
self.text = text;
|
|
}
|
|
|
|
fn next_section_or_headline(&mut self, text: &'a str) -> Event<'a> {
|
|
let end = Headline::find_level(text, std::usize::MAX);
|
|
if end != 0 {
|
|
self.push_stack(Container::Section(self.off), end, end);
|
|
Event::SectionBeg
|
|
} else {
|
|
self.next_headline(text)
|
|
}
|
|
}
|
|
|
|
fn next_headline(&mut self, text: &'a str) -> Event<'a> {
|
|
let (hdl, off, end) = Headline::parse(text, self.todo_keywords);
|
|
self.push_stack(Container::Headline(self.off + off), end, end);
|
|
self.off += off;
|
|
Event::HeadlineBeg(hdl)
|
|
}
|
|
|
|
fn next_ele(&mut self, text: &'a str) -> Event<'a> {
|
|
fn skip_empty_lines(text: &str) -> usize {
|
|
let mut i = 0;
|
|
for pos in memchr_iter(b'\n', text.as_bytes()) {
|
|
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
|
|
i = pos + 1;
|
|
} else {
|
|
return i;
|
|
}
|
|
}
|
|
if text.as_bytes()[i..].iter().all(u8::is_ascii_whitespace) {
|
|
text.len()
|
|
} else {
|
|
i
|
|
}
|
|
}
|
|
|
|
let start = skip_empty_lines(text);
|
|
if start == text.len() {
|
|
self.off += text.len();
|
|
return self.end();
|
|
};
|
|
let tail = &text[start..];
|
|
|
|
let (ele, off, limit, end) = self
|
|
.ele_buf
|
|
.take()
|
|
.or_else(|| self.real_next_ele(tail))
|
|
.unwrap_or_else(|| {
|
|
let mut pos = 0;
|
|
for i in memchr_iter(b'\n', tail.as_bytes()) {
|
|
if tail.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) {
|
|
return (Event::ParagraphBeg, 0, pos - 1 + start, i + 1 + start);
|
|
} else if let Some(buf) = self.real_next_ele(&tail[pos..]) {
|
|
self.ele_buf = Some(buf);
|
|
return (Event::ParagraphBeg, 0, pos - 1 + start, pos + start);
|
|
}
|
|
pos = i + 1;
|
|
}
|
|
let len = text.len();
|
|
(
|
|
Event::ParagraphBeg,
|
|
0,
|
|
if text.ends_with('\n') { len - 1 } else { len },
|
|
len,
|
|
)
|
|
});
|
|
|
|
debug_assert!(
|
|
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
|
|
"{} <= {} <= {} <= {}",
|
|
off,
|
|
limit,
|
|
end,
|
|
text.len()
|
|
);
|
|
|
|
match ele {
|
|
Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end),
|
|
Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end),
|
|
Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end),
|
|
Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end),
|
|
Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end),
|
|
Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end),
|
|
Event::ListBeg { ordered, .. } => {
|
|
self.push_stack(Container::List(limit, ordered), end, end)
|
|
}
|
|
_ => (),
|
|
}
|
|
|
|
self.off += off + start;
|
|
|
|
ele
|
|
}
|
|
|
|
// returns (event, offset, container limit, container end)
|
|
fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
debug_assert!(!text.starts_with('\n'));
|
|
|
|
if text.starts_with("[fn:") {
|
|
if let Some((label, cont, off)) = fn_def::parse(text) {
|
|
return Some((Event::FnDef { label, cont }, off + 1, 0, 0));
|
|
}
|
|
}
|
|
|
|
let (tail, line_begin) = text
|
|
.find(|c| c != ' ')
|
|
.map(|off| (&text[off..], off))
|
|
.unwrap_or((text, 0));
|
|
|
|
if let Some((ordered, bullet)) = list::is_item(tail) {
|
|
self.next_item.push(Some(bullet));
|
|
return Some((Event::ListBeg { ordered }, 0, line_begin, text.len()));
|
|
}
|
|
|
|
if tail.starts_with("CLOCK:") {
|
|
if let Some((clock, off)) = Clock::parse(tail) {
|
|
return Some((Event::Clock(clock), off + line_begin, 0, 0));
|
|
}
|
|
}
|
|
|
|
// TODO: LaTeX environment
|
|
if tail.starts_with("\\begin{") {}
|
|
|
|
// rule
|
|
if tail.starts_with("-----") {
|
|
let off = rule::parse(tail);
|
|
if off != 0 {
|
|
return Some((Event::Rule, off + line_begin, 0, 0));
|
|
}
|
|
}
|
|
|
|
if tail.starts_with(':') {
|
|
if let Some((name, off, limit, end)) = drawer::parse(tail) {
|
|
return Some((
|
|
Event::DrawerBeg(name),
|
|
off + line_begin,
|
|
limit + line_begin,
|
|
end + line_begin,
|
|
));
|
|
}
|
|
}
|
|
|
|
// fixed width
|
|
if tail.starts_with(": ") || tail.starts_with(":\n") {
|
|
// let end = line_ends
|
|
// .skip_while(|&i| {
|
|
// text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n")
|
|
// })
|
|
// .next()
|
|
// .map(|i| i + 1)
|
|
// .unwrap_or_else(|| text.len());
|
|
// let off = end - pos;
|
|
// brk!(Element::FixedWidth(&tail[0..off]), off);
|
|
}
|
|
|
|
// comment
|
|
if tail.starts_with("# ") || tail.starts_with("#\n") {
|
|
// let end = line_ends
|
|
// .skip_while(|&i| {
|
|
// text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n")
|
|
// })
|
|
// .next()
|
|
// .map(|i| i + 1)
|
|
// .unwrap_or_else(|| text.len());
|
|
// let off = end - pos;
|
|
// brk!(Element::Comment(&tail[0..off]), off);
|
|
}
|
|
|
|
if tail.starts_with("#+") {
|
|
block::parse(tail)
|
|
.map(|(name, args, begin, limit, end)| {
|
|
let cont = &tail[begin..limit];
|
|
match &*name.to_uppercase() {
|
|
"COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0),
|
|
"EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0),
|
|
"EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0),
|
|
"SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0),
|
|
"VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0),
|
|
"CENTER" => (
|
|
Event::CtrBlockBeg,
|
|
begin + line_begin,
|
|
limit + line_begin,
|
|
end + line_begin,
|
|
),
|
|
"QUOTE" => (
|
|
Event::QteBlockBeg,
|
|
begin + line_begin,
|
|
limit + line_begin,
|
|
end + line_begin,
|
|
),
|
|
_ => (
|
|
Event::SplBlockBeg { name, args },
|
|
begin + line_begin,
|
|
limit + line_begin,
|
|
end + line_begin,
|
|
),
|
|
}
|
|
})
|
|
.or_else(|| {
|
|
dyn_block::parse(tail).map(|(name, args, begin, limit, end)| {
|
|
(
|
|
Event::DynBlockBeg { name, args },
|
|
begin + line_begin,
|
|
limit + line_begin,
|
|
end + line_begin,
|
|
)
|
|
})
|
|
})
|
|
.or_else(|| {
|
|
Keyword::parse(tail).map(|(key, option, value, off)| {
|
|
(
|
|
if key.eq_ignore_ascii_case("CALL") {
|
|
Event::Call { value }
|
|
} else {
|
|
Event::Keyword(Keyword::new(key, option, value))
|
|
},
|
|
off + line_begin,
|
|
0,
|
|
0,
|
|
)
|
|
})
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
|
|
let (obj, off, limit, end) = self
|
|
.obj_buf
|
|
.take()
|
|
.or_else(|| self.real_next_obj(text))
|
|
.unwrap_or_else(|| {
|
|
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
|
|
let bytes = text.as_bytes();
|
|
let mut pos = 0;
|
|
|
|
while let Some(off) = bs.find(&bytes[pos..]) {
|
|
pos += off + 1;
|
|
|
|
if let Some(buf) = self.real_next_obj(&text[pos..]) {
|
|
self.obj_buf = Some(buf);
|
|
return (Event::Text(&text[0..pos]), pos, 0, 0);
|
|
}
|
|
}
|
|
|
|
(Event::Text(text), text.len(), 0, 0)
|
|
});
|
|
|
|
debug_assert!(
|
|
off <= text.len() && limit <= text.len() && end <= text.len(),
|
|
"{} <= {} <= {} <= {}",
|
|
off,
|
|
limit,
|
|
end,
|
|
text.len()
|
|
);
|
|
|
|
self.off += off;
|
|
|
|
match obj {
|
|
Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end),
|
|
Event::StrikeBeg => self.push_stack(Container::Strike, limit, end),
|
|
Event::ItalicBeg => self.push_stack(Container::Italic, limit, end),
|
|
Event::BoldBeg => self.push_stack(Container::Bold, limit, end),
|
|
_ => (),
|
|
}
|
|
|
|
obj
|
|
}
|
|
|
|
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
if text.len() < 3 {
|
|
return None;
|
|
}
|
|
|
|
let bytes = text.as_bytes();
|
|
match bytes[0] {
|
|
b'@' if bytes[1] == b'@' => {
|
|
Snippet::parse(text).map(|(snippet, off)| (Event::Snippet(snippet), off, 0, 0))
|
|
}
|
|
b'{' if bytes[1] == b'{' && bytes[2] == b'{' => {
|
|
Macros::parse(text).map(|(macros, off)| (Event::Macros(macros), off, 0, 0))
|
|
}
|
|
b'<' if bytes[1] == b'<' => {
|
|
if bytes[2] == b'<' {
|
|
radio_target::parse(text)
|
|
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
|
|
} else {
|
|
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
|
}
|
|
}
|
|
b'<' => Timestamp::parse_active(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
.or_else(|| {
|
|
Timestamp::parse_diary(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
}),
|
|
b'[' => {
|
|
if text[1..].starts_with("fn:") {
|
|
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
|
|
} else if bytes[1] == b'[' {
|
|
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
|
|
} else {
|
|
Cookie::parse(text)
|
|
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
|
|
.or_else(|| {
|
|
Timestamp::parse_inactive(text)
|
|
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
|
})
|
|
}
|
|
}
|
|
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]),
|
|
_ => self.next_inline(text),
|
|
}
|
|
}
|
|
|
|
fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
match text.as_bytes()[0] {
|
|
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
|
|
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
|
|
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
|
|
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
|
|
b'=' => emphasis::parse(text, b'=')
|
|
.map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)),
|
|
b'~' => {
|
|
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0))
|
|
}
|
|
b's' if text.starts_with("src_") => {
|
|
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
|
|
}
|
|
b'c' if text.starts_with("call_") => {
|
|
InlineCall::parse(text).map(|(call, off)| (Event::InlineCall(call), off, 0, 0))
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn push_stack(&mut self, container: Container, limit: usize, end: usize) {
|
|
self.stack
|
|
.push((container, self.off + limit, self.off + end));
|
|
}
|
|
|
|
#[inline]
|
|
fn end(&mut self) -> Event<'a> {
|
|
let (container, _, _) = self.stack.pop().unwrap();
|
|
match container {
|
|
Container::Bold => Event::BoldEnd,
|
|
Container::Drawer => Event::DrawerEnd,
|
|
Container::CtrBlock => Event::CtrBlockEnd,
|
|
Container::DynBlock => Event::DynBlockEnd,
|
|
Container::Headline(_) => Event::HeadlineEnd,
|
|
Container::Italic => Event::ItalicEnd,
|
|
Container::List(_, ordered) => Event::ListEnd { ordered },
|
|
Container::ListItem => Event::ListItemEnd,
|
|
Container::Paragraph => Event::ParagraphEnd,
|
|
Container::QteBlock => Event::QteBlockEnd,
|
|
Container::Section(_) => Event::SectionEnd,
|
|
Container::SplBlock => Event::SplBlockEnd,
|
|
Container::Strike => Event::StrikeEnd,
|
|
Container::Underline => Event::UnderlineEnd,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for Parser<'a> {
|
|
type Item = Event<'a>;
|
|
|
|
fn next(&mut self) -> Option<Event<'a>> {
|
|
if let Some(&(container, limit, end)) = self.stack.last() {
|
|
let tail = &self.text[self.off..limit];
|
|
|
|
// eprint!("{:1$}", ' ', self.stack_depth());
|
|
// eprintln!("{:?} {:?} {:?}", container, tail, self.next_item);
|
|
|
|
debug_assert!(
|
|
self.off <= limit && limit <= end && end <= self.text.len(),
|
|
"{} <= {} <= {} <= {}",
|
|
self.off,
|
|
limit,
|
|
end,
|
|
self.text.len()
|
|
);
|
|
|
|
Some(match container {
|
|
Container::Headline(beg) => {
|
|
if self.off >= limit {
|
|
self.off = end;
|
|
self.stack.pop();
|
|
Event::HeadlineEnd
|
|
} else if self.off == beg {
|
|
self.next_section_or_headline(tail)
|
|
} else {
|
|
self.next_headline(tail)
|
|
}
|
|
}
|
|
Container::Drawer
|
|
| Container::DynBlock
|
|
| Container::CtrBlock
|
|
| Container::QteBlock
|
|
| Container::SplBlock
|
|
| Container::ListItem => {
|
|
if self.off >= limit {
|
|
self.off = end;
|
|
self.end()
|
|
} else {
|
|
self.next_ele(tail)
|
|
}
|
|
}
|
|
Container::Section(beg) => {
|
|
// planning should be the first line of section
|
|
if self.off >= limit {
|
|
self.off = end;
|
|
self.stack.pop();
|
|
Event::SectionEnd
|
|
} else if self.off == beg {
|
|
if let Some((planning, off)) = Planning::parse(tail) {
|
|
self.off += off;
|
|
Event::Planning(planning)
|
|
} else {
|
|
self.next_ele(tail)
|
|
}
|
|
} else {
|
|
self.next_ele(tail)
|
|
}
|
|
}
|
|
Container::List(ident, ordered) => {
|
|
if let Some(bullet) = self.next_item.pop().unwrap() {
|
|
let off = bullet.len() + ident;
|
|
self.off += off;
|
|
let (limit, end, next) = list::parse(&tail[off..], ident);
|
|
self.push_stack(Container::ListItem, limit, end);
|
|
self.next_item.push(next);
|
|
Event::ListItemBeg { bullet }
|
|
} else {
|
|
self.off = end;
|
|
self.stack.pop();
|
|
Event::ListEnd { ordered }
|
|
}
|
|
}
|
|
Container::Paragraph
|
|
| Container::Bold
|
|
| Container::Underline
|
|
| Container::Italic
|
|
| Container::Strike => {
|
|
if self.off >= limit {
|
|
self.off = end;
|
|
self.end()
|
|
} else {
|
|
self.next_obj(tail)
|
|
}
|
|
}
|
|
})
|
|
} else if self.off < self.text.len() {
|
|
Some(self.next_section_or_headline(&self.text[self.off..]))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|