orgize/src/org.rs
2019-07-28 11:44:14 +08:00

527 lines
18 KiB
Rust

use indextree::{Arena, NodeId};
use jetscii::bytes;
use memchr::{memchr, memchr_iter};
use std::io::{Error, Write};
use crate::config::ParseConfig;
use crate::elements::*;
use crate::export::*;
use crate::iter::Iter;
pub struct Org<'a> {
pub(crate) arena: Arena<Element<'a>>,
pub(crate) document: NodeId,
}
enum Container<'a> {
// List
List {
content: &'a str,
node: NodeId,
indent: usize,
},
// Block, List Item
Block {
content: &'a str,
node: NodeId,
},
// Pargraph, Inline Markup
Inline {
content: &'a str,
node: NodeId,
},
// Headline, Document
Headline {
content: &'a str,
node: NodeId,
},
// Section
Section {
content: &'a str,
node: NodeId,
},
}
impl<'a> Org<'a> {
pub fn parse(text: &'a str) -> Self {
Org::parse_with_config(text, ParseConfig::default())
}
pub fn parse_with_config(content: &'a str, config: ParseConfig<'_>) -> Self {
let mut arena = Arena::new();
let document = arena.new_node(Element::Document);
let mut containers = vec![Container::Headline {
content,
node: document,
}];
while let Some(container) = containers.pop() {
match container {
Container::Headline {
mut content,
node: parent,
} => {
if !content.is_empty() {
let off = Headline::find_level(content, std::usize::MAX);
if off != 0 {
let node = arena.new_node(Element::Section);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Section {
content: &content[0..off],
node,
});
content = &content[off..];
}
}
while !content.is_empty() {
let (tail, headline, headline_content) = Headline::parse(content, &config);
let headline = Element::Headline(headline);
let node = arena.new_node(headline);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Headline {
content: headline_content,
node,
});
content = tail;
}
}
Container::Section { content, node } => {
// TODO
if let Some((tail, _planning)) = Planning::parse(content) {
parse_elements_children(&mut arena, tail, node, &mut containers);
} else {
parse_elements_children(&mut arena, content, node, &mut containers);
}
}
Container::Block { content, node } => {
parse_elements_children(&mut arena, content, node, &mut containers);
}
Container::Inline { content, node } => {
parse_objects_children(&mut arena, content, node, &mut containers);
}
Container::List {
content,
node,
indent,
} => {
parse_list_items(&mut arena, content, indent, node, &mut containers);
}
}
}
Org { arena, document }
}
pub fn iter(&'a self) -> Iter<'a> {
Iter {
arena: &self.arena,
traverse: self.document.traverse(&self.arena),
}
}
pub fn html<W: Write>(&self, wrtier: W) -> Result<(), Error> {
self.html_with_handler(wrtier, DefaultHtmlHandler)
}
pub fn html_with_handler<W, H, E>(&self, mut writer: W, mut handler: H) -> Result<(), E>
where
W: Write,
E: From<Error>,
H: HtmlHandler<E>,
{
use crate::iter::Event::*;
for event in self.iter() {
match event {
Start(element) => handler.start(&mut writer, element)?,
End(element) => handler.end(&mut writer, element)?,
}
}
Ok(())
}
}
fn parse_elements_children<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = skip_empty_lines(content);
if let Some((new_tail, element)) = parse_element(content, arena, containers) {
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
}
let mut text = tail;
let mut pos = 0;
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
tail = skip_empty_lines(&tail[i..]);
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
text = tail;
pos = 0;
} else if let Some((new_tail, element)) = parse_element(tail, arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
pos = 0;
}
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
text = tail;
} else {
tail = &tail[i..];
pos += i;
}
}
if !text.is_empty() {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
}
}
fn parse_element<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if let Some((tail, fn_def, content)) = FnDef::parse(contents) {
let node = arena.new_node(Element::FnDef(fn_def));
containers.push(Container::Block { content, node });
return Some((tail, node));
} else if let Some((tail, list, content)) = List::parse(contents) {
let indent = list.indent;
let node = arena.new_node(Element::List(list));
containers.push(Container::List {
content,
node,
indent,
});
return Some((tail, node));
}
let tail = contents.trim_start();
if let Some((tail, clock)) = Clock::parse(tail) {
return Some((tail, arena.new_node(clock)));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
if tail.starts_with('-') {
if let Ok((tail, rule)) = Rule::parse(tail) {
return Some((tail, arena.new_node(rule)));
}
}
if tail.starts_with(':') {
if let Some((tail, drawer, content)) = Drawer::parse(tail) {
return Some((tail, arena.new_node(drawer)));
}
}
// FixedWidth
if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") {
let mut last_end = 1; // ":"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == ":" || line.starts_with(": ") || line.starts_with(":\n")) {
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..i + 1],
});
return Some((&contents[i + 1..], fixed_width));
}
}
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], fixed_width));
}
// Comment
if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") {
let mut last_end = 1; // "#"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) {
let comment = arena.new_node(Element::Comment {
value: &contents[0..i + 1],
});
return Some((&contents[i + 1..], comment));
}
}
let comment = arena.new_node(Element::Comment {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], comment));
}
if tail.starts_with("#+") {
if let Some((tail, block, content)) = Block::parse(tail) {
let node = arena.new_node(block);
containers.push(Container::Block { content, node });
Some((tail, node))
} else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) {
let node = arena.new_node(dyn_block);
containers.push(Container::Block { content, node });
Some((tail, node))
} else {
Keyword::parse(tail)
.ok()
.map(|(tail, kw)| (tail, arena.new_node(kw)))
}
} else {
None
}
}
fn parse_objects_children<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = content;
if let Some((new_tail, obj)) = parse_object(tail, arena, containers) {
parent.append(obj, arena).unwrap();
tail = new_tail;
}
let mut text = tail;
let mut pos = 0;
let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
while let Some(off) = bs.find(tail.as_bytes()) {
match tail.as_bytes()[off] {
b'{' => {
if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
parent.append(node, arena).unwrap();
pos = 0;
}
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
} else if let Some((new_tail, obj)) =
parse_object(&tail[off + 1..], arena, containers)
{
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => {
if let Some((new_tail, obj)) = parse_object(&tail[off + 1..], arena, containers) {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
_ => {
if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
parent.append(node, arena).unwrap();
pos = 0;
}
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
}
tail = &tail[off + 1..];
pos += off + 1;
}
if !text.is_empty() {
let node = arena.new_node(Element::Text { value: text });
parent.append(node, arena).unwrap();
}
}
fn parse_object<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if contents.len() < 3 {
return None;
}
let bytes = contents.as_bytes();
match bytes[0] {
b'@' => Snippet::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'{' => Macros::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'<' => RadioTarget::parse(contents)
.map(|(tail, (radio, content))| (tail, radio))
.or_else(|_| Target::parse(contents))
.or_else(|_| {
Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.or_else(|_| {
Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'[' => {
if contents[1..].starts_with("fn:") {
FnRef::parse(contents)
.map(|(tail, fn_ref)| (tail, fn_ref.into()))
.map(|(tail, element)| (tail, arena.new_node(element)))
} else if bytes[1] == b'[' {
Link::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element)))
} else {
Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|| {
Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
.ok()
})
.map(|(tail, element)| (tail, arena.new_node(element)))
}
}
b'*' => {
if let Some((tail, content)) = parse_emphasis(contents, b'*') {
let node = arena.new_node(Element::Bold);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'+' => {
if let Some((tail, content)) = parse_emphasis(contents, b'+') {
let node = arena.new_node(Element::Strike);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'/' => {
if let Some((tail, content)) = parse_emphasis(contents, b'/') {
let node = arena.new_node(Element::Italic);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'_' => {
if let Some((tail, content)) = parse_emphasis(contents, b'_') {
let node = arena.new_node(Element::Underline);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'=' => parse_emphasis(contents, b'=')
.map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))),
b'~' => parse_emphasis(contents, b'~')
.map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))),
b's' => InlineSrc::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'c' => InlineCall::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
_ => None,
}
}
fn parse_list_items<'a>(
arena: &mut Arena<Element<'a>>,
mut contents: &'a str,
indent: usize,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
while !contents.is_empty() {
let (tail, list_item, content) = ListItem::parse(contents, indent);
let list_item = Element::ListItem(list_item);
let node = arena.new_node(list_item);
parent.append(node, arena).unwrap();
containers.push(Container::Block { content, node });
contents = tail;
}
}
fn skip_empty_lines(contents: &str) -> &str {
let mut i = 0;
for pos in memchr_iter(b'\n', contents.as_bytes()) {
if contents.as_bytes()[i..pos]
.iter()
.all(u8::is_ascii_whitespace)
{
i = pos + 1;
} else {
break;
}
}
&contents[i..]
}
#[test]
fn test_skip_empty_lines() {
assert_eq!(skip_empty_lines("foo"), "foo");
assert_eq!(skip_empty_lines(" foo"), " foo");
assert_eq!(skip_empty_lines(" \nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " foo\n");
}