feat(parser): drawer parsing

This commit is contained in:
PoiScript 2019-04-07 20:10:43 +08:00
parent fe591d2143
commit 6fa43f7571
8 changed files with 202 additions and 85 deletions

View file

@ -1,37 +1,42 @@
use crate::lines::Lines;
use memchr::memchr2;
use memchr::{memchr, memchr_iter};
// return (name, args, contents-begin, contents-end, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if src.len() <= 8 || src[2..8].to_uppercase() != "BEGIN_" {
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
return None;
}
let name = memchr2(b' ', b'\n', src.as_bytes())
.filter(|&i| src.as_bytes()[8..i].iter().all(u8::is_ascii_alphabetic))?;
let mut lines = Lines::new(src);
let (pre_limit, begin, _) = lines.next()?;
let args = if pre_limit == name {
None
} else {
Some(&src[name..pre_limit])
};
let name = &src[8..name];
let end_line = format!(r"#+END_{}", name.to_uppercase());
let mut pre_end = begin;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', text.as_bytes());
for (_, end, line) in lines {
if line.trim() == end_line {
return Some((name, args, begin, pre_end, end));
} else {
pre_end = end;
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes[8..i])
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((name, para, off, pos, i + 1));
}
pos = i + 1;
}
None
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((name, para, off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
@ -42,19 +47,23 @@ mod tests {
assert_eq!(
parse("#+BEGIN_SRC\n#+END_SRC"),
Some(("SRC", None, 12, 12, 21))
Some((
"SRC",
None,
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n#+END_SRC".len()
))
);
assert_eq!(
parse(
r#"#+BEGIN_SRC rust
fn main() {
// print "Hello World!" to the console
println!("Hello World!");
}
#+END_SRC
"#
),
Some(("SRC", Some(" rust"), 17, 104, 114))
parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
"SRC",
Some("javascript"),
"#+BEGIN_SRC javascript \n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
))
);
// TODO: more testing
}

51
src/elements/drawer.rs Normal file
View file

@ -0,0 +1,51 @@
use memchr::memchr_iter;
// return (name, offset, limit, end)
pub(crate) fn parse<'a>(text: &'a str) -> Option<(&'a str, usize, usize, usize)> {
debug_assert!(text.starts_with(':'));
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((&name[0..name.len() - 1], off, pos, i + 1));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((&name[0..name.len() - 1], off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
"PROPERTIES",
":PROPERTIES:\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
))
)
}
}

View file

@ -1,40 +1,41 @@
use crate::lines::Lines;
use memchr::{memchr, memchr2};
use memchr::{memchr, memchr_iter};
/// return (name, parameters, contents-begin, contents-end, end)
// return (name, parameters, offset, limit, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if src.len() <= 9 || !src[2..9].eq_ignore_ascii_case("BEGIN: ") {
if text.len() <= 9 || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
return None;
}
let mut lines = Lines::new(src);
let (mut pre_limit, _, _) = lines.next()?;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes);
for (limit, end, line) in lines {
if line.trim().eq_ignore_ascii_case("#+END:") {
let bytes = src.as_bytes();
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes[9..i])
.map(|x| (&text[9..9 + x], Some(text[9 + x..i].trim()), i + 1))
.unwrap_or((&text[9..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let i = memchr2(b' ', b'\n', &bytes[9..])
.map(|i| i + 9)
.filter(|&i| bytes[9..i].iter().all(|&c| c.is_ascii_alphabetic()))?;
let name = &src[8..i].trim();
let mut pos = off;
return Some(if bytes[i] == b'\n' {
(name, None, i, pre_limit, end)
} else {
let begin = memchr(b'\n', bytes)
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
(name, Some(&src[i..begin].trim()), begin, pre_limit, end)
});
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((name, para, off, pos, i + 1));
}
pre_limit = limit;
pos = i + 1;
}
None
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((name, para, off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
@ -45,13 +46,14 @@ mod tests {
// TODO: testing
assert_eq!(
parse(
r"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"
),
Some(("clocktable", Some(":scope file"), 32, 40, 48))
parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some((
"clocktable",
Some(":scope file"),
"#+BEGIN: clocktable :scope file\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
))
);
}
}

View file

@ -1,4 +1,5 @@
use crate::lines::Lines;
use memchr::memchr;
#[inline]
pub fn is_item(text: &str) -> Option<bool> {
@ -33,7 +34,7 @@ pub fn is_item(text: &str) -> Option<bool> {
}
}
// returns (bullets, contents begin, contents end, end, has more)
// return (bullets, offset, limit, end, has more)
#[inline]
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(
@ -50,7 +51,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
let mut lines = Lines::new(src);
let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap();
let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
let begin = match memchr(b' ', &first_line.as_bytes()[ident..]) {
Some(i) => i + ident + 1,
None => {
let len = first_line.len();

View file

@ -1,5 +1,6 @@
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod fn_def;
pub(crate) mod keyword;