feat(elements): table parsing

This commit is contained in:
PoiScript 2019-08-06 14:03:16 +08:00
parent 470f90bfb5
commit 3e4772a896
6 changed files with 688 additions and 553 deletions

View file

@ -1,47 +1,16 @@
use indextree::{Arena, NodeEdge, NodeId};
use jetscii::bytes;
use memchr::{memchr, memchr2, memchr_iter};
use std::io::{Error, Write};
use crate::config::ParseConfig;
use crate::elements::*;
use crate::export::*;
use crate::parsers::skip_empty_lines;
use crate::parsers::*;
pub struct Org<'a> {
pub(crate) arena: Arena<Element<'a>>,
pub(crate) document: NodeId,
}
enum Container<'a> {
// List
List {
content: &'a str,
node: NodeId,
indent: usize,
},
// Block, List Item
Block {
content: &'a str,
node: NodeId,
},
// Pargraph, Inline Markup
Inline {
content: &'a str,
node: NodeId,
},
// Headline
Headline {
content: &'a str,
node: NodeId,
},
// Document
Document {
content: &'a str,
node: NodeId,
},
}
#[derive(Debug)]
pub enum Event<'a> {
Start(&'a Element<'a>),
@ -57,69 +26,32 @@ impl Org<'_> {
let mut arena = Arena::new();
let document = arena.new_node(Element::Document);
let mut containers = vec![Container::Document {
let containers = &mut vec![Container::Document {
content,
node: document,
}];
while let Some(container) = containers.pop() {
match container {
Container::Document {
content,
node: parent,
} => {
let mut tail = skip_empty_lines(content);
if let Some((new_tail, content)) = parse_section(tail) {
let node = arena.new_node(Element::Section);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Block { content, node });
tail = new_tail;
}
while !tail.is_empty() {
let (new_tail, content) = parse_headline(tail);
let node = arena.new_node(Element::Headline);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Headline { content, node });
tail = new_tail;
}
Container::Document { content, node } => {
parse_section_and_headlines(&mut arena, content, node, containers);
}
Container::Headline {
content,
node: parent,
} => {
let mut tail = content;
let (new_tail, title) = Title::parse(tail, config).unwrap();
let content = title.raw;
let node = arena.new_node(Element::Title(title));
parent.append(node, &mut arena).unwrap();
containers.push(Container::Inline { content, node });
tail = skip_empty_lines(new_tail);
if let Some((new_tail, content)) = parse_section(tail) {
let node = arena.new_node(Element::Section);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Block { content, node });
tail = new_tail;
}
while !tail.is_empty() {
let (new_tail, content) = parse_headline(tail);
let node = arena.new_node(Element::Headline);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Headline { content, node });
tail = new_tail;
}
Container::Headline { content, node } => {
let content = parse_title(&mut arena, content, node, containers, config);
parse_section_and_headlines(&mut arena, content, node, containers);
}
Container::Block { content, node } => {
parse_blocks(&mut arena, content, node, &mut containers);
parse_blocks(&mut arena, content, node, containers);
}
Container::Inline { content, node } => {
parse_inlines(&mut arena, content, node, &mut containers);
parse_inlines(&mut arena, content, node, containers);
}
Container::List {
content,
node,
indent,
} => {
parse_list_items(&mut arena, content, indent, node, &mut containers);
parse_list_items(&mut arena, content, indent, node, containers);
}
}
}
@ -176,467 +108,3 @@ impl Org<'_> {
Ok(())
}
}
fn is_headline(text: &str) -> Option<usize> {
if let Some(off) = memchr2(b'\n', b' ', text.as_bytes()) {
if off > 0 && text[0..off].as_bytes().iter().all(|&c| c == b'*') {
Some(off)
} else {
None
}
} else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') {
Some(text.len())
} else {
None
}
}
fn parse_section(text: &str) -> Option<(&str, &str)> {
if text.is_empty() || is_headline(text).is_some() {
return None;
}
for i in memchr_iter(b'\n', text.as_bytes()) {
if is_headline(&text[i + 1..]).is_some() {
return Some((&text[i + 1..], &text[0..i + 1]));
}
}
Some(("", text))
}
fn parse_headline(text: &str) -> (&str, &str) {
let level = is_headline(text).unwrap();
for i in memchr_iter(b'\n', text.as_bytes()) {
if let Some(l) = is_headline(&text[i + 1..]) {
if l <= level {
return (&text[i + 1..], &text[0..i + 1]);
}
}
}
("", text)
}
fn parse_blocks<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = skip_empty_lines(content);
if let Some((new_tail, element)) = parse_block(content, arena, containers) {
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
}
let mut text = tail;
let mut pos = 0;
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
tail = skip_empty_lines(&tail[i..]);
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
text = tail;
pos = 0;
} else if let Some((new_tail, element)) = parse_block(tail, arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
pos = 0;
}
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
text = tail;
} else {
tail = &tail[i..];
pos += i;
}
}
if !text.is_empty() {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
}
}
fn parse_block<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if let Some((tail, fn_def, content)) = FnDef::parse(contents) {
let node = arena.new_node(Element::FnDef(fn_def));
containers.push(Container::Block { content, node });
return Some((tail, node));
} else if let Some((tail, list, content)) = List::parse(contents) {
let indent = list.indent;
let node = arena.new_node(Element::List(list));
containers.push(Container::List {
content,
node,
indent,
});
return Some((tail, node));
}
let tail = contents.trim_start();
if let Ok((tail, clock)) = Clock::parse(tail) {
return Some((tail, arena.new_node(clock)));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
if tail.starts_with('-') {
if let Ok((tail, rule)) = Rule::parse(tail) {
return Some((tail, arena.new_node(rule)));
}
}
if tail.starts_with(':') {
if let Ok((tail, (drawer, content))) = Drawer::parse(tail) {
let node = arena.new_node(drawer.into());
containers.push(Container::Block { content, node });
return Some((tail, node));
}
}
// FixedWidth
if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") {
let mut last_end = 1; // ":"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let tail = contents[last_end..].trim_start();
if !(tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n")) {
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], fixed_width));
}
}
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], fixed_width));
}
// Comment
if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") {
let mut last_end = 1; // "#"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = contents[last_end..].trim_start();
if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) {
let comment = arena.new_node(Element::Comment {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], comment));
}
}
let comment = arena.new_node(Element::Comment {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], comment));
}
if tail.starts_with("#+") {
if let Ok((tail, (block, content))) = Block::parse(tail) {
match &*block.name.to_uppercase() {
"CENTER" => {
let node = arena.new_node(Element::CenterBlock(CenterBlock {
parameters: block.args,
}));
containers.push(Container::Block { content, node });
Some((tail, node))
}
"QUOTE" => {
let node = arena.new_node(Element::QuoteBlock(QuoteBlock {
parameters: block.args,
}));
containers.push(Container::Block { content, node });
Some((tail, node))
}
"COMMENT" => {
let node = arena.new_node(Element::CommentBlock(CommentBlock {
data: block.args,
contents: content,
}));
Some((tail, node))
}
"EXAMPLE" => {
let node = arena.new_node(Element::ExampleBlock(ExampleBlock {
data: block.args,
contents: content,
}));
Some((tail, node))
}
"EXPORT" => {
let node = arena.new_node(Element::ExportBlock(ExportBlock {
data: block.args.unwrap_or(""),
contents: content,
}));
Some((tail, node))
}
"SRC" => {
let (language, arguments) = block
.args
.map(|args| args.split_at(args.find(' ').unwrap_or_else(|| args.len())))
.unwrap_or(("", ""));
let node = arena.new_node(Element::SourceBlock(SourceBlock {
arguments,
language,
contents: content,
}));
Some((tail, node))
}
"VERSE" => {
let node = arena.new_node(Element::VerseBlock(VerseBlock {
parameters: block.args,
}));
containers.push(Container::Block { content, node });
Some((tail, node))
}
_ => {
let node = arena.new_node(Element::SpecialBlock(SpecialBlock {
parameters: block.args,
name: block.name,
}));
containers.push(Container::Block { content, node });
Some((tail, node))
}
}
} else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) {
let node = arena.new_node(dyn_block);
containers.push(Container::Block { content, node });
Some((tail, node))
} else {
Keyword::parse(tail)
.ok()
.map(|(tail, kw)| (tail, arena.new_node(kw)))
}
} else {
None
}
}
fn parse_inlines<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = content;
if let Some((new_tail, element)) = parse_inline(tail, arena, containers) {
parent.append(element, arena).unwrap();
tail = new_tail;
}
let mut text = tail;
let mut pos = 0;
let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
while let Some(off) = bs.find(tail.as_bytes()) {
match tail.as_bytes()[off] {
b'{' => {
if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
parent.append(node, arena).unwrap();
pos = 0;
}
parent.append(element, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
} else if let Some((new_tail, element)) =
parse_inline(&tail[off + 1..], arena, containers)
{
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(element, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => {
if let Some((new_tail, element)) = parse_inline(&tail[off + 1..], arena, containers)
{
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(element, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
_ => {
if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
parent.append(node, arena).unwrap();
pos = 0;
}
parent.append(element, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
}
tail = &tail[off + 1..];
pos += off + 1;
}
if !text.is_empty() {
let node = arena.new_node(Element::Text { value: text });
parent.append(node, arena).unwrap();
}
}
fn parse_inline<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if contents.len() < 3 {
return None;
}
let bytes = contents.as_bytes();
match bytes[0] {
b'@' => Snippet::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'{' => Macros::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'<' => RadioTarget::parse(contents)
.map(|(tail, (radio, _content))| (tail, radio))
.or_else(|_| Target::parse(contents))
.or_else(|_| {
Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.or_else(|_| {
Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'[' => {
if contents[1..].starts_with("fn:") {
FnRef::parse(contents)
.ok()
.map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into())))
} else if bytes[1] == b'[' {
Link::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element)))
} else {
Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|_| {
Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
})
.ok()
.map(|(tail, element)| (tail, arena.new_node(element)))
}
}
b'*' => {
if let Some((tail, content)) = parse_emphasis(contents, b'*') {
let node = arena.new_node(Element::Bold);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'+' => {
if let Some((tail, content)) = parse_emphasis(contents, b'+') {
let node = arena.new_node(Element::Strike);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'/' => {
if let Some((tail, content)) = parse_emphasis(contents, b'/') {
let node = arena.new_node(Element::Italic);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'_' => {
if let Some((tail, content)) = parse_emphasis(contents, b'_') {
let node = arena.new_node(Element::Underline);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'=' => parse_emphasis(contents, b'=')
.map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))),
b'~' => parse_emphasis(contents, b'~')
.map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))),
b's' => InlineSrc::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'c' => InlineCall::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
_ => None,
}
}
fn parse_list_items<'a>(
arena: &mut Arena<Element<'a>>,
mut contents: &'a str,
indent: usize,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
while !contents.is_empty() {
let (tail, list_item, content) = ListItem::parse(contents, indent);
let list_item = Element::ListItem(list_item);
let node = arena.new_node(list_item);
parent.append(node, arena).unwrap();
containers.push(Container::Block { content, node });
contents = tail;
}
}