feat(parser): drawer parsing
This commit is contained in:
parent
fe591d2143
commit
6fa43f7571
8 changed files with 202 additions and 85 deletions
|
|
@ -1,37 +1,42 @@
|
|||
use crate::lines::Lines;
|
||||
use memchr::memchr2;
|
||||
use memchr::{memchr, memchr_iter};
|
||||
|
||||
// return (name, args, contents-begin, contents-end, end)
|
||||
#[inline]
|
||||
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
|
||||
debug_assert!(src.starts_with("#+"));
|
||||
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
|
||||
debug_assert!(text.starts_with("#+"));
|
||||
|
||||
if src.len() <= 8 || src[2..8].to_uppercase() != "BEGIN_" {
|
||||
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let name = memchr2(b' ', b'\n', src.as_bytes())
|
||||
.filter(|&i| src.as_bytes()[8..i].iter().all(u8::is_ascii_alphabetic))?;
|
||||
let mut lines = Lines::new(src);
|
||||
let (pre_limit, begin, _) = lines.next()?;
|
||||
let args = if pre_limit == name {
|
||||
None
|
||||
} else {
|
||||
Some(&src[name..pre_limit])
|
||||
};
|
||||
let name = &src[8..name];
|
||||
let end_line = format!(r"#+END_{}", name.to_uppercase());
|
||||
let mut pre_end = begin;
|
||||
let bytes = text.as_bytes();
|
||||
let mut lines = memchr_iter(b'\n', text.as_bytes());
|
||||
|
||||
for (_, end, line) in lines {
|
||||
if line.trim() == end_line {
|
||||
return Some((name, args, begin, pre_end, end));
|
||||
} else {
|
||||
pre_end = end;
|
||||
let (name, para, off) = lines
|
||||
.next()
|
||||
.map(|i| {
|
||||
memchr(b' ', &bytes[8..i])
|
||||
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
|
||||
.unwrap_or((&text[8..i], None, i + 1))
|
||||
})
|
||||
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
|
||||
|
||||
let mut pos = off;
|
||||
let end = format!(r"#+END_{}", name.to_uppercase());
|
||||
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
|
||||
return Some((name, para, off, pos, i + 1));
|
||||
}
|
||||
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
None
|
||||
if text[pos..].trim().eq_ignore_ascii_case(&end) {
|
||||
Some((name, para, off, pos, text.len()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -42,19 +47,23 @@ mod tests {
|
|||
|
||||
assert_eq!(
|
||||
parse("#+BEGIN_SRC\n#+END_SRC"),
|
||||
Some(("SRC", None, 12, 12, 21))
|
||||
Some((
|
||||
"SRC",
|
||||
None,
|
||||
"#+BEGIN_SRC\n".len(),
|
||||
"#+BEGIN_SRC\n".len(),
|
||||
"#+BEGIN_SRC\n#+END_SRC".len()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse(
|
||||
r#"#+BEGIN_SRC rust
|
||||
fn main() {
|
||||
// print "Hello World!" to the console
|
||||
println!("Hello World!");
|
||||
}
|
||||
#+END_SRC
|
||||
"#
|
||||
),
|
||||
Some(("SRC", Some(" rust"), 17, 104, 114))
|
||||
parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
|
||||
Some((
|
||||
"SRC",
|
||||
Some("javascript"),
|
||||
"#+BEGIN_SRC javascript \n".len(),
|
||||
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
|
||||
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
|
||||
))
|
||||
);
|
||||
// TODO: more testing
|
||||
}
|
||||
|
|
|
|||
51
src/elements/drawer.rs
Normal file
51
src/elements/drawer.rs
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
use memchr::memchr_iter;
|
||||
|
||||
// return (name, offset, limit, end)
|
||||
pub(crate) fn parse<'a>(text: &'a str) -> Option<(&'a str, usize, usize, usize)> {
|
||||
debug_assert!(text.starts_with(':'));
|
||||
|
||||
let mut lines = memchr_iter(b'\n', text.as_bytes());
|
||||
|
||||
let (name, off) = lines
|
||||
.next()
|
||||
.map(|i| (text[1..i].trim_end(), i + 1))
|
||||
.filter(|(name, _)| {
|
||||
name.ends_with(':')
|
||||
&& name[0..name.len() - 1]
|
||||
.as_bytes()
|
||||
.iter()
|
||||
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
|
||||
})?;
|
||||
|
||||
let mut pos = off;
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
|
||||
return Some((&name[0..name.len() - 1], off, pos, i + 1));
|
||||
}
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
|
||||
Some((&name[0..name.len() - 1], off, pos, text.len()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn parse() {
|
||||
use super::parse;
|
||||
|
||||
assert_eq!(
|
||||
parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
|
||||
Some((
|
||||
"PROPERTIES",
|
||||
":PROPERTIES:\n".len(),
|
||||
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
|
||||
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
|
||||
))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,40 +1,41 @@
|
|||
use crate::lines::Lines;
|
||||
use memchr::{memchr, memchr2};
|
||||
use memchr::{memchr, memchr_iter};
|
||||
|
||||
/// return (name, parameters, contents-begin, contents-end, end)
|
||||
// return (name, parameters, offset, limit, end)
|
||||
#[inline]
|
||||
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
|
||||
debug_assert!(src.starts_with("#+"));
|
||||
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
|
||||
debug_assert!(text.starts_with("#+"));
|
||||
|
||||
if src.len() <= 9 || !src[2..9].eq_ignore_ascii_case("BEGIN: ") {
|
||||
if text.len() <= 9 || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut lines = Lines::new(src);
|
||||
let (mut pre_limit, _, _) = lines.next()?;
|
||||
let bytes = text.as_bytes();
|
||||
let mut lines = memchr_iter(b'\n', bytes);
|
||||
|
||||
for (limit, end, line) in lines {
|
||||
if line.trim().eq_ignore_ascii_case("#+END:") {
|
||||
let bytes = src.as_bytes();
|
||||
let (name, para, off) = lines
|
||||
.next()
|
||||
.map(|i| {
|
||||
memchr(b' ', &bytes[9..i])
|
||||
.map(|x| (&text[9..9 + x], Some(text[9 + x..i].trim()), i + 1))
|
||||
.unwrap_or((&text[9..i], None, i + 1))
|
||||
})
|
||||
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
|
||||
|
||||
let i = memchr2(b' ', b'\n', &bytes[9..])
|
||||
.map(|i| i + 9)
|
||||
.filter(|&i| bytes[9..i].iter().all(|&c| c.is_ascii_alphabetic()))?;
|
||||
let name = &src[8..i].trim();
|
||||
let mut pos = off;
|
||||
|
||||
return Some(if bytes[i] == b'\n' {
|
||||
(name, None, i, pre_limit, end)
|
||||
} else {
|
||||
let begin = memchr(b'\n', bytes)
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or_else(|| src.len());
|
||||
(name, Some(&src[i..begin].trim()), begin, pre_limit, end)
|
||||
});
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
|
||||
return Some((name, para, off, pos, i + 1));
|
||||
}
|
||||
pre_limit = limit;
|
||||
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
None
|
||||
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
|
||||
Some((name, para, off, pos, text.len()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -45,13 +46,14 @@ mod tests {
|
|||
|
||||
// TODO: testing
|
||||
assert_eq!(
|
||||
parse(
|
||||
r"#+BEGIN: clocktable :scope file
|
||||
CONTENTS
|
||||
#+END:
|
||||
"
|
||||
),
|
||||
Some(("clocktable", Some(":scope file"), 32, 40, 48))
|
||||
parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
|
||||
Some((
|
||||
"clocktable",
|
||||
Some(":scope file"),
|
||||
"#+BEGIN: clocktable :scope file\n".len(),
|
||||
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
|
||||
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use crate::lines::Lines;
|
||||
use memchr::memchr;
|
||||
|
||||
#[inline]
|
||||
pub fn is_item(text: &str) -> Option<bool> {
|
||||
|
|
@ -33,7 +34,7 @@ pub fn is_item(text: &str) -> Option<bool> {
|
|||
}
|
||||
}
|
||||
|
||||
// returns (bullets, contents begin, contents end, end, has more)
|
||||
// return (bullets, offset, limit, end, has more)
|
||||
#[inline]
|
||||
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
|
||||
debug_assert!(
|
||||
|
|
@ -50,7 +51,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
|
|||
|
||||
let mut lines = Lines::new(src);
|
||||
let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap();
|
||||
let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
|
||||
let begin = match memchr(b' ', &first_line.as_bytes()[ident..]) {
|
||||
Some(i) => i + ident + 1,
|
||||
None => {
|
||||
let len = first_line.len();
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub(crate) mod block;
|
||||
pub(crate) mod clock;
|
||||
pub(crate) mod drawer;
|
||||
pub(crate) mod dyn_block;
|
||||
pub(crate) mod fn_def;
|
||||
pub(crate) mod keyword;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue