feat: indextree-based org parser

This commit is contained in:
PoiScript 2019-06-26 21:53:08 +08:00
parent 3beabcedfa
commit f786233852
34 changed files with 1919 additions and 1282 deletions

View file

@ -1,69 +1,77 @@
use memchr::{memchr, memchr_iter};
// return (name, args, contents-begin, contents-end, end)
#[inline]
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Block<'a> {
pub name: &'a str,
pub args: Option<&'a str>,
}
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
return None;
}
impl Block<'_> {
#[inline]
// return (block, contents-begin, contents-end, end)
pub fn parse(text: &str) -> Option<(Block<'_>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &text.as_bytes()[8..i])
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((name, para, off, pos, i + 1));
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
return None;
}
pos = i + 1;
}
let mut lines = memchr_iter(b'\n', text.as_bytes());
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((name, para, off, pos, text.len()))
} else {
None
let (name, args, off) = lines
.next()
.map(|i| {
memchr(b' ', &text.as_bytes()[8..i])
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((Block { name, args }, off, pos, i + 1));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((Block { name, args }, off, pos, text.len()))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("#+BEGIN_SRC\n#+END_SRC"),
Some((
"SRC",
None,
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n#+END_SRC".len()
))
);
assert_eq!(
parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
"SRC",
Some("javascript"),
"#+BEGIN_SRC javascript \n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
))
);
// TODO: more testing
}
#[test]
fn parse() {
assert_eq!(
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
Some((
Block {
name: "SRC",
args: None,
},
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n#+END_SRC".len()
))
);
assert_eq!(
Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
Block {
name: "SRC",
args: Some("javascript"),
},
"#+BEGIN_SRC javascript \n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
))
);
// TODO: more testing
}

View file

@ -1,4 +1,4 @@
use crate::objects::timestamp::{Datetime, Timestamp};
use crate::elements::{Datetime, Timestamp};
use memchr::memchr;
/// clock elements
@ -23,8 +23,8 @@ pub enum Clock<'a> {
},
}
impl<'a> Clock<'a> {
pub(crate) fn parse(text: &'a str) -> Option<(Clock<'a>, usize)> {
impl Clock<'_> {
pub(crate) fn parse(text: &str) -> Option<(Clock<'_>, usize)> {
let (text, eol) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
@ -104,7 +104,7 @@ impl<'a> Clock<'a> {
}
/// returns `Some` if the clock is closed, `None` if running
pub fn duration(&self) -> Option<&'a str> {
pub fn duration(&self) -> Option<&str> {
match self {
Clock::Closed { duration, .. } => Some(duration),
Clock::Running { .. } => None,

66
src/elements/cookie.rs Normal file
View file

@ -0,0 +1,66 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Cookie<'a> {
Percent(&'a str),
Slash(&'a str, &'a str),
}
impl Cookie<'_> {
#[inline]
// return (clock, offset)
pub(crate) fn parse(src: &str) -> Option<(Cookie<'_>, usize)> {
debug_assert!(src.starts_with('['));
let bytes = src.as_bytes();
let num1 =
memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(u8::is_ascii_digit))?;
if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' {
Some((Cookie::Percent(&src[1..num1]), num1 + 2))
} else {
let num2 = memchr(b']', bytes)
.filter(|&i| bytes[num1 + 1..i].iter().all(u8::is_ascii_digit))?;
Some((Cookie::Slash(&src[1..num1], &src[num1 + 1..num2]), num2 + 1))
}
}
}
#[test]
fn parse() {
assert_eq!(
Cookie::parse("[1/10]"),
Some((Cookie::Slash("1", "10"), "[1/10]".len()))
);
assert_eq!(
Cookie::parse("[1/1000]"),
Some((Cookie::Slash("1", "1000"), "[1/1000]".len()))
);
assert_eq!(
Cookie::parse("[10%]"),
Some((Cookie::Percent("10"), "[10%]".len()))
);
assert_eq!(
Cookie::parse("[%]"),
Some((Cookie::Percent(""), "[%]".len()))
);
assert_eq!(
Cookie::parse("[/]"),
Some((Cookie::Slash("", ""), "[/]".len()))
);
assert_eq!(
Cookie::parse("[100/]"),
Some((Cookie::Slash("100", ""), "[100/]".len()))
);
assert_eq!(
Cookie::parse("[/100]"),
Some((Cookie::Slash("", "100"), "[/100]".len()))
);
assert_eq!(Cookie::parse("[10% ]"), None);
assert_eq!(Cookie::parse("[1//100]"), None);
assert_eq!(Cookie::parse("[1\\100]"), None);
assert_eq!(Cookie::parse("[10%%]"), None);
}

View file

@ -1,51 +1,69 @@
use memchr::memchr_iter;
// return (name, offset, limit, end)
pub(crate) fn parse(text: &str) -> Option<(&str, usize, usize, usize)> {
debug_assert!(text.starts_with(':'));
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Drawer<'a> {
pub name: &'a str,
}
let mut lines = memchr_iter(b'\n', text.as_bytes());
impl<'a> Drawer<'a> {
#[inline]
// return (drawer, contents-begin, contents-end , end)
pub(crate) fn parse(text: &'a str) -> Option<(Drawer<'a>, usize, usize, usize)> {
debug_assert!(text.starts_with(':'));
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut lines = memchr_iter(b'\n', text.as_bytes());
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((&name[0..name.len() - 1], off, pos, i + 1));
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((
Drawer {
name: &name[0..name.len() - 1],
},
off,
pos,
i + 1,
));
}
pos = i + 1;
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((&name[0..name.len() - 1], off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((
"PROPERTIES",
":PROPERTIES:\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
Drawer {
name: &name[0..name.len() - 1],
},
off,
pos,
text.len(),
))
)
} else {
None
}
}
}
#[test]
fn parse() {
assert_eq!(
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
Drawer { name: "PROPERTIES" },
":PROPERTIES:\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
))
)
}

View file

@ -1,65 +1,87 @@
use memchr::{memchr, memchr_iter};
// return (name, parameters, offset, limit, end)
#[inline]
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct DynBlock<'a> {
pub block_name: &'a str,
pub arguments: Option<&'a str>,
}
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
return None;
}
impl DynBlock<'_> {
#[inline]
// return (dyn_block, contents-begin, contents-end, end)
pub(crate) fn parse(text: &str) -> Option<(DynBlock<'_>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes);
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes["#+BEGIN: ".len()..i])
.map(|x| {
(
&text["#+BEGIN: ".len().."#+BEGIN: ".len() + x],
Some(text["#+BEGIN: ".len() + x..i].trim()),
i + 1,
)
})
.unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((name, para, off, pos, i + 1));
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
return None;
}
pos = i + 1;
}
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes);
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((name, para, off, pos, text.len()))
} else {
None
}
}
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes["#+BEGIN: ".len()..i])
.map(|x| {
(
&text["#+BEGIN: ".len().."#+BEGIN: ".len() + x],
Some(text["#+BEGIN: ".len() + x..i].trim()),
i + 1,
)
})
.unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
let mut pos = off;
// TODO: testing
assert_eq!(
parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((
DynBlock {
block_name: name,
arguments: para,
},
off,
pos,
i + 1,
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((
"clocktable",
Some(":scope file"),
"#+BEGIN: clocktable :scope file\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
DynBlock {
block_name: name,
arguments: para,
},
off,
pos,
text.len(),
))
);
} else {
None
}
}
}
#[test]
fn parse() {
// TODO: testing
assert_eq!(
DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some((
DynBlock {
block_name: "clocktable",
arguments: Some(":scope file"),
},
"#+BEGIN: clocktable :scope file\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
))
);
}

56
src/elements/emphasis.rs Normal file
View file

@ -0,0 +1,56 @@
use bytecount::count;
use memchr::memchr;
#[inline]
pub fn parse(text: &str, marker: u8) -> Option<usize> {
debug_assert!(text.len() >= 3);
let bytes = text.as_bytes();
if bytes[1].is_ascii_whitespace() {
return None;
}
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
if bytes[end].is_ascii_whitespace() {
return None;
}
if let Some(&post) = bytes.get(end + 2) {
if post == b' '
|| post == b'-'
|| post == b'.'
|| post == b','
|| post == b':'
|| post == b'!'
|| post == b'?'
|| post == b'\''
|| post == b'\n'
|| post == b')'
|| post == b'}'
{
Some(end + 2)
} else {
None
}
} else {
Some(end + 2)
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("*bold*", b'*'), Some("*bold*".len()));
assert_eq!(parse("*bo\nld*", b'*'), Some("*bo\nld*".len()));
assert_eq!(parse("*bold*a", b'*'), None);
assert_eq!(parse("*bold*", b'/'), None);
assert_eq!(parse("*bold *", b'*'), None);
assert_eq!(parse("* bold*", b'*'), None);
assert_eq!(parse("*b\nol\nd*", b'*'), None);
}
}

View file

@ -1,60 +1,68 @@
use memchr::memchr;
#[inline]
pub fn parse(text: &str) -> Option<(&str, &str, usize)> {
if text.starts_with("[fn:") {
let (label, off) = memchr(b']', text.as_bytes())
.filter(|&i| {
i != 4
&& text.as_bytes()["[fn:".len()..i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| (&text["[fn:".len()..i], i + 1))?;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FnDef<'a> {
pub label: &'a str,
}
let (content, off) = memchr(b'\n', text.as_bytes())
.map(|i| (&text[off..i], i))
.unwrap_or_else(|| (&text[off..], text.len()));
impl FnDef<'_> {
#[inline]
pub fn parse(text: &str) -> Option<(FnDef<'_>, usize, usize)> {
if text.starts_with("[fn:") {
let (label, off) = memchr(b']', text.as_bytes())
.filter(|&i| {
i != 4
&& text.as_bytes()["[fn:".len()..i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| (&text["[fn:".len()..i], i + 1))?;
Some((label, content, off))
} else {
None
let end = memchr(b'\n', text.as_bytes()).unwrap_or_else(|| text.len());
Some((FnDef { label }, off, end))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("[fn:1] https://orgmode.org"),
Some((
"1",
" https://orgmode.org",
"[fn:1] https://orgmode.org".len()
))
);
assert_eq!(
parse("[fn:word_1] https://orgmode.org"),
Some((
"word_1",
" https://orgmode.org",
"[fn:word_1] https://orgmode.org".len()
))
);
assert_eq!(
parse("[fn:WORD-1] https://orgmode.org"),
Some((
"WORD-1",
" https://orgmode.org",
"[fn:WORD-1] https://orgmode.org".len()
))
);
assert_eq!(parse("[fn:WORD]"), Some(("WORD", "", "[fn:WORD]".len())));
assert_eq!(parse("[fn:] https://orgmode.org"), None);
assert_eq!(parse("[fn:wor d] https://orgmode.org"), None);
assert_eq!(parse("[fn:WORD https://orgmode.org"), None);
}
#[test]
fn parse() {
assert_eq!(
FnDef::parse("[fn:1] https://orgmode.org"),
Some((
FnDef { label: "1" },
"[fn:1]".len(),
"[fn:1] https://orgmode.org".len()
))
);
assert_eq!(
FnDef::parse("[fn:word_1] https://orgmode.org"),
Some((
FnDef { label: "word_1" },
"[fn:word_1]".len(),
"[fn:word_1] https://orgmode.org".len()
))
);
assert_eq!(
FnDef::parse("[fn:WORD-1] https://orgmode.org"),
Some((
FnDef { label: "WORD-1" },
"[fn:WORD-1]".len(),
"[fn:WORD-1] https://orgmode.org".len()
))
);
assert_eq!(
FnDef::parse("[fn:WORD]"),
Some((
FnDef { label: "WORD" },
"[fn:WORD]".len(),
"[fn:WORD]".len()
))
);
assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None);
assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None);
assert_eq!(FnDef::parse("[fn:WORD https://orgmode.org"), None);
}

97
src/elements/fn_ref.rs Normal file
View file

@ -0,0 +1,97 @@
use memchr::{memchr2, memchr2_iter};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FnRef<'a> {
pub label: Option<&'a str>,
pub definition: Option<&'a str>,
}
impl FnRef<'_> {
#[inline]
// return (fn_ref, offset)
pub fn parse(text: &str) -> Option<(FnRef<'_>, usize)> {
debug_assert!(text.starts_with("[fn:"));
let bytes = text.as_bytes();
let (label, off) = memchr2(b']', b':', &bytes["[fn:".len()..])
.filter(|&i| {
bytes["[fn:".len().."[fn:".len() + i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| {
(
if i == 0 {
None
} else {
Some(&text["[fn:".len().."[fn:".len() + i])
},
"[fn:".len() + i,
)
})?;
let (definition, off) = if bytes[off] == b':' {
let mut pairs = 1;
memchr2_iter(b'[', b']', &bytes[off..])
.find(|&i| {
if bytes[i + off] == b'[' {
pairs += 1;
} else {
pairs -= 1;
}
pairs == 0
})
.map(|i| (Some(&text[off + 1..off + i]), i + off + 1))?
} else {
(None, off + 1)
};
Some((FnRef { label, definition }, off))
}
}
#[test]
fn parse() {
assert_eq!(
FnRef::parse("[fn:1]"),
Some((
FnRef {
label: Some("1"),
definition: None
},
"[fn:1]".len()
))
);
assert_eq!(
FnRef::parse("[fn:1:2]"),
Some((
FnRef {
label: Some("1"),
definition: Some("2")
},
"[fn:1:2]".len()
))
);
assert_eq!(
FnRef::parse("[fn::2]"),
Some((
FnRef {
label: None,
definition: Some("2")
},
"[fn::2]".len()
))
);
assert_eq!(
FnRef::parse("[fn::[]]"),
Some((
FnRef {
label: None,
definition: Some("[]")
},
"[fn::[]]".len()
))
);
assert_eq!(FnRef::parse("[fn::[]"), None);
}

11
src/elements/fragment.rs Normal file
View file

@ -0,0 +1,11 @@
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Fragment<'a> {
value: &'a str,
}
impl<'a> Fragment<'a> {
pub fn parse(src: &'a str) -> Option<(Fragment<'a>, usize)> {
None
}
}

278
src/elements/headline.rs Normal file
View file

@ -0,0 +1,278 @@
//! Headline
use jetscii::ByteSubstring;
use memchr::{memchr, memchr2, memrchr};
pub(crate) const DEFAULT_TODO_KEYWORDS: &[&str] =
&["TODO", "DONE", "NEXT", "WAITING", "LATER", "CANCELLED"];
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Headline<'a> {
/// headline level, number of stars
pub level: usize,
/// priority cookie
pub priority: Option<char>,
/// headline tags, including the sparated colons
pub tags: Vec<&'a str>,
/// headline title
pub title: &'a str,
/// headline keyword
pub keyword: Option<&'a str>,
}
impl Headline<'_> {
pub(crate) fn parse<'a>(text: &'a str, keywords: &[&str]) -> (Headline<'a>, usize, usize) {
let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len());
debug_assert!(level > 0);
debug_assert!(text.as_bytes()[0..level].iter().all(|&c| c == b'*'));
let (off, end) = memchr(b'\n', text.as_bytes())
.map(|i| {
(
i + 1,
if i + 1 == text.len() {
i + 1
} else {
Headline::find_level(&text[i + 1..], level) + i + 1
},
)
})
.unwrap_or_else(|| (text.len(), text.len()));
if level == off {
return (
Headline {
level,
keyword: None,
priority: None,
title: "",
tags: Vec::new(),
},
off,
end,
);
}
let tail = text[level + 1..off].trim();
let (keyword, tail) = {
let (word, off) = memchr(b' ', tail.as_bytes())
.map(|i| (&tail[0..i], i + 1))
.unwrap_or_else(|| (tail, tail.len()));
if keywords.contains(&word) {
(Some(word), &tail[off..])
} else {
(None, tail)
}
};
let (priority, tail) = {
let bytes = tail.as_bytes();
if bytes.len() > 4
&& bytes[0] == b'['
&& bytes[1] == b'#'
&& bytes[2].is_ascii_uppercase()
&& bytes[3] == b']'
&& bytes[4] == b' '
{
(Some(bytes[2] as char), tail[4..].trim_start())
} else {
(None, tail)
}
};
let (title, tags) = if let Some(i) = memrchr(b' ', tail.as_bytes()) {
let last = &tail[i + 1..];
if last.len() > 2 && last.starts_with(':') && last.ends_with(':') {
(tail[..i].trim(), last)
} else {
(tail, "")
}
} else {
(tail, "")
};
(
Headline {
level,
keyword,
priority,
title,
tags: tags.split(':').filter(|s| !s.is_empty()).collect(),
},
off,
end,
)
}
pub(crate) fn find_level(text: &str, level: usize) -> usize {
let bytes = text.as_bytes();
if bytes[0] == b'*' {
if let Some(stars) = memchr2(b'\n', b' ', bytes) {
if stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') {
return 0;
}
}
}
let mut pos = 0;
while let Some(off) = ByteSubstring::new(b"\n*").find(&bytes[pos..]) {
pos += off + 1;
if let Some(stars) = memchr2(b'\n', b' ', &bytes[pos..]) {
if stars > 0 && stars <= level && bytes[pos..pos + stars].iter().all(|&c| c == b'*')
{
return pos;
}
}
}
text.len()
}
/// checks if this headline is "commented"
pub fn is_commented(&self) -> bool {
self.title.starts_with("COMMENT ")
}
/// checks if this headline is "archived"
pub fn is_archived(&self) -> bool {
self.tags.contains(&"ARCHIVE")
}
}
#[test]
fn parse() {
assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: Some('A'),
keyword: Some("TODO"),
title: "COMMENT Title",
tags: vec!["tag", "a2%"],
},
);
assert_eq!(
Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: None,
tags: vec!["tag", "a2%"],
title: "ToDO [#A] COMMENT Title",
keyword: None,
},
);
assert_eq!(
Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: None,
tags: vec!["tag", "a2%"],
title: "T0DO [#A] COMMENT Title",
keyword: None,
},
);
assert_eq!(
Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: None,
tags: vec!["tag", "a2%"],
title: "[#1] COMMENT Title",
keyword: Some("TODO")
},
);
assert_eq!(
Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: None,
tags: vec!["tag", "a2%"],
title: "[#a] COMMENT Title",
keyword: Some("TODO")
},
);
assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%", &["TODO"]).0,
Headline {
level: 4,
priority: Some('A'),
tags: Vec::new(),
title: "COMMENT Title :tag:a2%",
keyword: Some("TODO"),
},
);
assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: Some('A'),
tags: Vec::new(),
title: "COMMENT Title tag:a2%:",
keyword: Some("TODO"),
},
);
assert_eq!(
Headline::parse("**** COMMENT Title tag:a2%:", &["TODO"]).0,
Headline {
level: 4,
priority: None,
tags: Vec::new(),
title: "COMMENT Title tag:a2%:",
keyword: None,
},
);
}
#[test]
fn parse_todo_keywords() {
assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", &[]).0,
Headline {
level: 4,
priority: None,
keyword: None,
title: "TODO [#A] COMMENT Title",
tags: vec!["tag", "a2%"],
},
);
assert_eq!(
Headline::parse("**** TASK [#A] COMMENT Title :tag:a2%:", &["TASK"]).0,
Headline {
level: 4,
priority: Some('A'),
keyword: Some("TASK"),
title: "COMMENT Title",
tags: vec!["tag", "a2%"],
},
);
}
#[test]
fn is_commented() {
assert!(Headline::parse("* COMMENT Title", &[]).0.is_commented());
assert!(!Headline::parse("* Title", &[]).0.is_commented());
assert!(!Headline::parse("* C0MMENT Title", &[]).0.is_commented());
assert!(!Headline::parse("* comment Title", &[]).0.is_commented());
}
#[test]
fn is_archived() {
assert!(Headline::parse("* Title :ARCHIVE:", &[]).0.is_archived());
assert!(Headline::parse("* Title :t:ARCHIVE:", &[]).0.is_archived());
assert!(Headline::parse("* Title :ARCHIVE:t:", &[]).0.is_archived());
assert!(!Headline::parse("* Title", &[]).0.is_commented());
assert!(!Headline::parse("* Title :ARCHIVED:", &[]).0.is_archived());
assert!(!Headline::parse("* Title :ARCHIVES:", &[]).0.is_archived());
assert!(!Headline::parse("* Title :archive:", &[]).0.is_archived());
}
#[test]
fn find_level() {
assert_eq!(
Headline::find_level("\n** Title\n* Title\n** Title\n", 1),
"\n** Title\n".len()
);
}

108
src/elements/inline_call.rs Normal file
View file

@ -0,0 +1,108 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct InlineCall<'a> {
pub name: &'a str,
pub inside_header: Option<&'a str>,
pub args: &'a str,
pub end_header: Option<&'a str>,
}
impl<'a> InlineCall<'a> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(InlineCall<'_>, usize)> {
debug_assert!(text.starts_with("call_"));
let bytes = text.as_bytes();
let (name, off) = memchr2(b'[', b'(', bytes)
.map(|i| (&text["call_".len()..i], i))
.filter(|(name, _)| name.as_bytes().iter().all(u8::is_ascii_graphic))?;
let (inside_header, off) = if bytes[off] == b'[' {
memchr(b']', &bytes[off..])
.filter(|&i| {
bytes[off + i + 1] == b'('
&& bytes[off + 1..off + i].iter().all(|&c| c != b'\n')
})
.map(|i| (Some(&text[off + 1..off + i]), off + i + 1))?
} else {
(None, off)
};
let (args, off) = memchr(b')', &bytes[off..])
.map(|i| (&text[off + 1..off + i], off + i + 1))
.filter(|(args, _)| args.as_bytes().iter().all(|&c| c != b'\n'))?;
let (end_header, off) = if text.len() > off && text.as_bytes()[off] == b'[' {
memchr(b']', &bytes[off..])
.filter(|&i| bytes[off + 1..off + i].iter().all(|&c| c != b'\n'))
.map(|i| (Some(&text[off + 1..off + i]), off + i + 1))?
} else {
(None, off)
};
Some((
InlineCall {
name,
args,
inside_header,
end_header,
},
off,
))
}
}
#[test]
fn parse() {
assert_eq!(
InlineCall::parse("call_square(4)"),
Some((
InlineCall {
name: "square",
args: "4",
inside_header: None,
end_header: None,
},
"call_square(4)".len()
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)"),
Some((
InlineCall {
name: "square",
args: "4",
inside_header: Some(":results output"),
end_header: None,
},
"call_square[:results output](4)".len()
))
);
assert_eq!(
InlineCall::parse("call_square(4)[:results html]"),
Some((
InlineCall {
name: "square",
args: "4",
inside_header: None,
end_header: Some(":results html"),
},
"call_square(4)[:results html]".len()
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)[:results html]"),
Some((
InlineCall {
name: "square",
args: "4",
inside_header: Some(":results output"),
end_header: Some(":results html"),
},
"call_square[:results output](4)[:results html]".len()
))
);
}

View file

@ -0,0 +1,71 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct InlineSrc<'a> {
pub lang: &'a str,
pub option: Option<&'a str>,
pub body: &'a str,
}
impl<'a> InlineSrc<'a> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(InlineSrc<'_>, usize)> {
debug_assert!(text.starts_with("src_"));
let (lang, off) = memchr2(b'[', b'{', text.as_bytes())
.map(|i| (&text["src_".len()..i], i))
.filter(|(lang, off)| {
*off != 4 && lang.as_bytes().iter().all(|c| !c.is_ascii_whitespace())
})?;
let (option, off) = if text.as_bytes()[off] == b'[' {
memchr(b']', text[off..].as_bytes())
.filter(|&i| text[off..off + i].as_bytes().iter().all(|c| *c != b'\n'))
.map(|i| (Some(&text[off + 1..off + i]), off + i + 1))?
} else {
(None, off)
};
let (body, off) = memchr(b'}', text[off..].as_bytes())
.map(|i| (&text[off + 1..off + i], off + i + 1))
.filter(|(body, _)| body.as_bytes().iter().all(|c| *c != b'\n'))?;
Some((InlineSrc { lang, option, body }, off))
}
}
#[test]
fn parse() {
assert_eq!(
InlineSrc::parse("src_C{int a = 0;}"),
Some((
InlineSrc {
lang: "C",
option: None,
body: "int a = 0;"
},
"src_C{int a = 0;}".len()
))
);
assert_eq!(
InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>}"),
Some((
InlineSrc {
lang: "xml",
option: Some(":exports code"),
body: "<tag>text</tag>",
},
"src_xml[:exports code]{<tag>text</tag>}".len()
))
);
assert_eq!(
InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>"),
None
);
assert_eq!(
InlineSrc::parse("src_[:exports code]{<tag>text</tag>}"),
None
);
// assert_eq!(parse("src_xml[:exports code]"), None);
}

View file

@ -1,59 +1,16 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Key<'a> {
// Affiliated Keywords
// Only "CAPTION" and "RESULTS" keywords can have an optional value.
Caption { option: Option<&'a str> },
Header,
Name,
Plot,
Results { option: Option<&'a str> },
Attr { backend: &'a str },
// Keywords
Author,
Date,
Title,
Custom(&'a str),
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Keyword<'a> {
pub key: Key<'a>,
pub key: &'a str,
pub option: Option<&'a str>,
pub value: &'a str,
}
impl<'a> Keyword<'a> {
#[inline]
pub(crate) fn new(key: &'a str, option: Option<&'a str>, value: &'a str) -> Keyword<'a> {
Keyword {
key: match &*key.to_uppercase() {
"AUTHOR" => Key::Author,
"DATE" => Key::Date,
"HEADER" => Key::Header,
"NAME" => Key::Name,
"PLOT" => Key::Plot,
"TITLE" => Key::Title,
"RESULTS" => Key::Results { option },
"CAPTION" => Key::Caption { option },
k => {
if k.starts_with("ATTR_") {
Key::Attr {
backend: &key["ATTR_".len()..],
}
} else {
Key::Custom(key)
}
}
},
value,
}
}
impl Keyword<'_> {
#[inline]
// return (key, option, value, offset)
pub(crate) fn parse(text: &str) -> Option<(&str, Option<&str>, &str, usize)> {
debug_assert!(text.starts_with("#+"));
@ -79,11 +36,11 @@ impl<'a> Keyword<'a> {
(None, off)
};
let (value, off) = memchr(b'\n', bytes)
.map(|i| (&text[off..i], i + 1))
.unwrap_or_else(|| (&text[off..], text.len()));
let end = memchr(b'\n', bytes)
.map(|i| i + 1)
.unwrap_or_else(|| text.len());
Some((key, option, value.trim(), off))
Some((key, option, &text[off..end].trim(), end))
}
}

68
src/elements/link.rs Normal file
View file

@ -0,0 +1,68 @@
use jetscii::Substring;
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Link<'a> {
pub path: &'a str,
pub desc: Option<&'a str>,
}
impl Link<'_> {
#[inline]
// return (link, offset)
pub(crate) fn parse(text: &str) -> Option<(Link<'_>, usize)> {
debug_assert!(text.starts_with("[["));
let (path, off) = memchr(b']', text.as_bytes())
.map(|i| (&text["[[".len()..i], i))
.filter(|(path, _)| {
path.as_bytes()
.iter()
.all(|&c| c != b'<' && c != b'>' && c != b'\n')
})?;
if *text.as_bytes().get(off + 1)? == b']' {
Some((Link { path, desc: None }, off + 2))
} else if text.as_bytes()[off + 1] == b'[' {
let (desc, off) = Substring::new("]]")
.find(&text[off + 1..])
.map(|i| (&text[off + 2..off + 1 + i], off + 1 + i + "]]".len()))
.filter(|(desc, _)| desc.as_bytes().iter().all(|&c| c != b'[' && c != b']'))?;
Some((
Link {
path,
desc: Some(desc),
},
off,
))
} else {
None
}
}
}
#[test]
fn parse() {
assert_eq!(
Link::parse("[[#id]]"),
Some((
Link {
path: "#id",
desc: None
},
"[[#id]]".len()
))
);
assert_eq!(
Link::parse("[[#id][desc]]"),
Some((
Link {
path: "#id",
desc: Some("desc")
},
"[[#id][desc]]".len()
))
);
assert_eq!(Link::parse("[[#id][desc]"), None);
}

View file

@ -1,54 +1,104 @@
use memchr::memchr_iter;
use std::iter::once;
// (indentation, ordered, limit, end)
#[inline]
pub fn parse(text: &str) -> Option<(usize, bool, usize, usize)> {
let (indent, tail) = text
.find(|c| c != ' ')
.map(|off| (off, &text[off..]))
.unwrap_or((0, text));
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct List {
pub indent: usize,
pub ordered: bool,
}
let ordered = is_item(tail)?;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes)
.map(|i| i + 1)
.chain(once(text.len()));
let mut pos = lines.next()?;
impl List {
#[inline]
// return (list, begin, end)
pub(crate) fn parse(text: &str) -> Option<(List, usize, usize)> {
let (indent, tail) = text
.find(|c| c != ' ')
.map(|off| (off, &text[off..]))
.unwrap_or((0, text));
while let Some(i) = lines.next() {
let line = &text[pos..i];
return if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
// this line is no empty
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((indent, ordered, pos, pos))
} else {
pos = i;
continue;
}
} else if let Some(next_i) = lines.next() {
// this line is empty
let line = &text[i..next_i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
let ordered = is_item(tail)?;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes)
.map(|i| i + 1)
.chain(once(text.len()));
let mut pos = lines.next()?;
while let Some(i) = lines.next() {
let line = &text[pos..i];
return if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
// this line is no empty
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((indent, ordered, pos, pos))
Some((List { indent, ordered }, pos, pos))
} else {
pos = next_i;
pos = i;
continue;
}
} else if let Some(next_i) = lines.next() {
// this line is empty
let line = &text[i..next_i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((List { indent, ordered }, pos, pos))
} else {
pos = next_i;
continue;
}
} else {
Some((List { indent, ordered }, pos, next_i))
}
} else {
Some((indent, ordered, pos, next_i))
}
} else {
Some((indent, ordered, pos, i))
};
}
Some((List { indent, ordered }, pos, i))
};
}
Some((indent, ordered, pos, pos))
Some((List { indent, ordered }, pos, pos))
}
}
pub struct ListItem<'a> {
pub bullet: &'a str,
}
impl ListItem<'_> {
pub fn parse(text: &str, indent: usize) -> (ListItem<'_>, usize, usize) {
debug_assert!(&text[0..indent].trim().is_empty());
let off = &text[indent..].find(' ').unwrap() + 1 + indent;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes)
.map(|i| i + 1)
.chain(once(text.len()));
let mut pos = lines.next().unwrap();
for i in lines {
let line = &text[pos..i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
if line_indent == indent {
return (
ListItem {
bullet: &text[indent..off],
},
off,
pos,
);
}
}
pos = i;
}
(
ListItem {
bullet: &text[indent..off],
},
off,
text.len(),
)
}
}
#[inline]
@ -97,60 +147,91 @@ fn test_is_item() {
}
#[test]
fn test_parse() {
fn list_parse() {
assert_eq!(
parse("+ item1\n+ item2"),
Some((0, false, "+ item1\n+ item2".len(), "+ item1\n+ item2".len()))
List::parse("+ item1\n+ item2"),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n+ item2".len(),
"+ item1\n+ item2".len()
))
);
assert_eq!(
parse("* item1\n \n* item2"),
List::parse("* item1\n \n* item2"),
Some((
0,
false,
List {
indent: 0,
ordered: false
},
"* item1\n \n* item2".len(),
"* item1\n \n* item2".len()
))
);
assert_eq!(
parse("* item1\n \n \n* item2"),
Some((0, false, "* item1\n".len(), "* item1\n \n \n".len()))
);
assert_eq!(
parse("* item1\n \n "),
Some((0, false, "+ item1\n".len(), "* item1\n \n ".len()))
);
assert_eq!(
parse("+ item1\n + item2\n "),
List::parse("* item1\n \n \n* item2"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"* item1\n".len(),
"* item1\n \n \n".len()
))
);
assert_eq!(
List::parse("* item1\n \n "),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n".len(),
"* item1\n \n ".len()
))
);
assert_eq!(
List::parse("+ item1\n + item2\n "),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n + item2\n".len(),
"+ item1\n + item2\n ".len()
))
);
assert_eq!(
parse("+ item1\n \n + item2\n \n+ item 3"),
List::parse("+ item1\n \n + item2\n \n+ item 3"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"+ item1\n \n + item2\n \n+ item 3".len(),
"+ item1\n \n + item2\n \n+ item 3".len()
))
);
assert_eq!(
parse(" + item1\n \n + item2"),
List::parse(" + item1\n \n + item2"),
Some((
2,
false,
List {
indent: 2,
ordered: false,
},
" + item1\n \n + item2".len(),
" + item1\n \n + item2".len()
))
);
assert_eq!(
parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"),
List::parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"+ 1\n\n - 2\n\n - 3\n\n+ 4".len(),
"+ 1\n\n - 2\n\n - 3\n\n+ 4".len()
))

80
src/elements/macros.rs Normal file
View file

@ -0,0 +1,80 @@
use jetscii::Substring;
use memchr::memchr2;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Macros<'a> {
pub name: &'a str,
pub arguments: Option<&'a str>,
}
impl Macros<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(Macros<'_>, usize)> {
debug_assert!(text.starts_with("{{{"));
let bytes = text.as_bytes();
if text.len() <= 3 || !bytes[3].is_ascii_alphabetic() {
return None;
}
let (name, off) = memchr2(b'}', b'(', bytes)
.filter(|&i| {
bytes[3..i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| (&text[3..i], i))?;
let (arguments, off) = if bytes[off] == b'}' {
if text.len() <= off + 2 || bytes[off + 1] != b'}' || bytes[off + 2] != b'}' {
return None;
}
(None, off + "}}}".len())
} else {
Substring::new(")}}}")
.find(&text[off..])
.map(|i| (Some(&text[off + 1..off + i]), off + i + ")}}}".len()))?
};
Some((Macros { name, arguments }, off))
}
}
#[test]
fn parse() {
assert_eq!(
Macros::parse("{{{poem(red,blue)}}}"),
Some((
Macros {
name: "poem",
arguments: Some("red,blue")
},
"{{{poem(red,blue)}}}".len()
))
);
assert_eq!(
Macros::parse("{{{poem())}}}"),
Some((
Macros {
name: "poem",
arguments: Some(")")
},
"{{{poem())}}}".len()
))
);
assert_eq!(
Macros::parse("{{{author}}}"),
Some((
Macros {
name: "author",
arguments: None
},
"{{{author}}}".len()
))
);
assert_eq!(Macros::parse("{{{0uthor}}}"), None);
assert_eq!(Macros::parse("{{{author}}"), None);
assert_eq!(Macros::parse("{{{poem(}}}"), None);
assert_eq!(Macros::parse("{{{poem)}}}"), None);
}

View file

@ -1,16 +1,224 @@
/// elements
///
/// elements means some syntactical parts that have the same level with paragraph.
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod fn_def;
pub(crate) mod keyword;
pub(crate) mod list;
pub(crate) mod planning;
pub(crate) mod rule;
mod block;
mod clock;
mod cookie;
mod drawer;
mod dyn_block;
mod fn_def;
mod fn_ref;
mod fragment;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;
mod planning;
mod radio_target;
mod rule;
mod snippet;
mod target;
mod timestamp;
pub use self::clock::Clock;
pub use self::keyword::{Key, Keyword};
pub use self::planning::Planning;
pub mod emphasis;
pub use self::{
block::Block,
clock::Clock,
cookie::Cookie,
drawer::Drawer,
dyn_block::DynBlock,
fn_def::FnDef,
fn_ref::FnRef,
headline::Headline,
inline_call::InlineCall,
inline_src::InlineSrc,
keyword::Keyword,
link::Link,
list::{List, ListItem},
macros::Macros,
planning::Planning,
radio_target::RadioTarget,
rule::Rule,
snippet::Snippet,
target::Target,
timestamp::*,
};
#[derive(Debug)]
pub enum Element<'a> {
Block {
block: Block<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
BabelCall {
value: &'a str,
begin: usize,
end: usize,
},
Section {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Clock {
clock: Clock<'a>,
begin: usize,
end: usize,
},
Cookie {
cookie: Cookie<'a>,
begin: usize,
end: usize,
},
RadioTarget {
radio_target: RadioTarget<'a>,
begin: usize,
end: usize,
},
Drawer {
drawer: Drawer<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Document {
begin: usize,
end: usize,
},
DynBlock {
dyn_block: DynBlock<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
FnDef {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
fn_def: FnDef<'a>,
},
FnRef {
fn_ref: FnRef<'a>,
begin: usize,
end: usize,
},
Headline {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
headline: Headline<'a>,
},
InlineCall {
inline_call: InlineCall<'a>,
begin: usize,
end: usize,
},
InlineSrc {
inline_src: InlineSrc<'a>,
begin: usize,
end: usize,
},
Keyword {
keyword: Keyword<'a>,
begin: usize,
end: usize,
},
Link {
link: Link<'a>,
begin: usize,
end: usize,
},
List {
list: List,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
ListItem {
list_item: ListItem<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Macros {
macros: Macros<'a>,
begin: usize,
end: usize,
},
Planning(Planning<'a>),
Snippet {
begin: usize,
end: usize,
snippet: Snippet<'a>,
},
Text {
value: &'a str,
begin: usize,
end: usize,
},
Paragraph {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Rule {
begin: usize,
end: usize,
},
Timestamp {
begin: usize,
end: usize,
timestamp: Timestamp<'a>,
},
Target {
target: Target<'a>,
begin: usize,
end: usize,
},
Bold {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Strike {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Italic {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Underline {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Verbatim {
begin: usize,
end: usize,
value: &'a str,
},
Code {
begin: usize,
end: usize,
value: &'a str,
},
}

View file

@ -1,4 +1,4 @@
use crate::objects::Timestamp;
use crate::elements::Timestamp;
use memchr::memchr;
/// palnning elements
@ -13,8 +13,9 @@ pub struct Planning<'a> {
pub closed: Option<Timestamp<'a>>,
}
impl<'a> Planning<'a> {
pub(crate) fn parse(text: &'a str) -> Option<(Planning<'a>, usize)> {
impl Planning<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(Planning<'_>, usize)> {
let (mut deadline, mut scheduled, mut closed) = (None, None, None);
let (mut tail, off) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
@ -60,7 +61,7 @@ impl<'a> Planning<'a> {
#[test]
fn prase() {
use crate::objects::Datetime;
use crate::elements::Datetime;
assert_eq!(
Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"),

View file

@ -0,0 +1,53 @@
use jetscii::Substring;
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct RadioTarget<'a> {
contents: &'a str,
}
impl RadioTarget<'_> {
#[inline]
// return (radio_target, offset)
pub(crate) fn parse(src: &str) -> Option<(RadioTarget<'_>, usize)> {
debug_assert!(src.starts_with("<<<"));
let bytes = src.as_bytes();
let (contents, off) = Substring::new(">>>")
.find(src)
.filter(|&i| {
bytes[3] != b' '
&& bytes[i - 1] != b' '
&& bytes[3..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| (&src[3..i], i + ">>>".len()))?;
Some((RadioTarget { contents }, off))
}
}
#[test]
fn parse() {
assert_eq!(
RadioTarget::parse("<<<target>>>"),
Some((RadioTarget { contents: "target" }, "<<<target>>>".len()))
);
assert_eq!(
RadioTarget::parse("<<<tar get>>>"),
Some((
RadioTarget {
contents: "tar get"
},
"<<<tar get>>>".len()
))
);
assert_eq!(RadioTarget::parse("<<<target >>>"), None);
assert_eq!(RadioTarget::parse("<<< target>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta<get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta>get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta\nget>>>"), None);
assert_eq!(RadioTarget::parse("<<<target>>"), None);
}

View file

@ -1,37 +1,37 @@
#[inline]
pub fn parse(text: &str) -> usize {
let (text, off) = memchr::memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
pub struct Rule;
if text.len() >= 5 && text.as_bytes().iter().all(|&c| c == b'-') {
off
} else {
0
impl Rule {
#[inline]
// return offset
pub(crate) fn parse(text: &str) -> Option<usize> {
let (text, off) = memchr::memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
if text.len() >= 5 && text.as_bytes().iter().all(|&c| c == b'-') {
Some(off)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("-----"), "-----".len());
assert_eq!(parse("--------"), "--------".len());
assert_eq!(parse(" -----"), " -----".len());
assert_eq!(parse("\t\t-----"), "\t\t-----".len());
assert_eq!(parse("\t\t-----\n"), "\t\t-----\n".len());
assert_eq!(parse("\t\t----- \n"), "\t\t----- \n".len());
assert_eq!(parse(""), 0);
assert_eq!(parse("----"), 0);
assert_eq!(parse(" ----"), 0);
assert_eq!(parse(" 0----"), 0);
assert_eq!(parse("0 ----"), 0);
assert_eq!(parse("0------"), 0);
assert_eq!(parse("----0----"), 0);
assert_eq!(parse("\t\t----"), 0);
assert_eq!(parse("------0"), 0);
assert_eq!(parse("----- 0"), 0);
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----"), Some("-----".len()));
assert_eq!(Rule::parse("--------"), Some("--------".len()));
assert_eq!(Rule::parse(" -----"), Some(" -----".len()));
assert_eq!(Rule::parse("\t\t-----"), Some("\t\t-----".len()));
assert_eq!(Rule::parse("\t\t-----\n"), Some("\t\t-----\n".len()));
assert_eq!(Rule::parse("\t\t----- \n"), Some("\t\t----- \n".len()));
assert_eq!(Rule::parse(""), None);
assert_eq!(Rule::parse("----"), None);
assert_eq!(Rule::parse(" ----"), None);
assert_eq!(Rule::parse(" None----"), None);
assert_eq!(Rule::parse("None ----"), None);
assert_eq!(Rule::parse("None------"), None);
assert_eq!(Rule::parse("----None----"), None);
assert_eq!(Rule::parse("\t\t----"), None);
assert_eq!(Rule::parse("------None"), None);
assert_eq!(Rule::parse("----- None"), None);
}

69
src/elements/snippet.rs Normal file
View file

@ -0,0 +1,69 @@
use jetscii::Substring;
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Snippet<'a> {
pub name: &'a str,
pub value: &'a str,
}
impl Snippet<'_> {
#[inline]
// return (snippet offset)
pub(crate) fn parse(text: &str) -> Option<(Snippet<'_>, usize)> {
debug_assert!(text.starts_with("@@"));
let (name, off) = memchr(b':', text.as_bytes())
.filter(|&i| {
i != 2
&& text.as_bytes()[2..i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-')
})
.map(|i| (&text[2..i], i + 1))?;
let (value, off) = Substring::new("@@")
.find(&text[off..])
.map(|i| (&text[off..off + i], off + i + "@@".len()))?;
Some((Snippet { name, value }, off))
}
}
#[test]
fn parse() {
assert_eq!(
Snippet::parse("@@html:<b>@@"),
Some((
Snippet {
name: "html",
value: "<b>"
},
"@@html:<b>@@".len()
))
);
assert_eq!(
Snippet::parse("@@latex:any arbitrary LaTeX code@@"),
Some((
Snippet {
name: "latex",
value: "any arbitrary LaTeX code",
},
"@@latex:any arbitrary LaTeX code@@".len()
))
);
assert_eq!(
Snippet::parse("@@html:@@"),
Some((
Snippet {
name: "html",
value: "",
},
"@@html:@@".len()
))
);
assert_eq!(Snippet::parse("@@html:<b>@"), None);
assert_eq!(Snippet::parse("@@html<b>@@"), None);
assert_eq!(Snippet::parse("@@:<b>@@"), None);
}

53
src/elements/target.rs Normal file
View file

@ -0,0 +1,53 @@
use jetscii::Substring;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Target<'a> {
pub target: &'a str,
}
impl Target<'_> {
#[inline]
// return (target, offset)
pub(crate) fn parse(text: &str) -> Option<(Target<'_>, usize)> {
debug_assert!(text.starts_with("<<"));
let bytes = text.as_bytes();
Substring::new(">>")
.find(text)
.filter(|&i| {
bytes[2] != b' '
&& bytes[i - 1] != b' '
&& bytes[2..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| {
(
Target {
target: &text[2..i],
},
i + ">>".len(),
)
})
}
}
#[test]
fn parse() {
assert_eq!(
Target::parse("<<target>>"),
Some((Target { target: "target" }, "<<target>>".len()))
);
assert_eq!(
Target::parse("<<tar get>>"),
Some((Target { target: "tar get" }, "<<tar get>>".len()))
);
assert_eq!(Target::parse("<<target >>"), None);
assert_eq!(Target::parse("<< target>>"), None);
assert_eq!(Target::parse("<<ta<get>>"), None);
assert_eq!(Target::parse("<<ta>get>>"), None);
assert_eq!(Target::parse("<<ta\nget>>"), None);
assert_eq!(Target::parse("<<target>"), None);
}

527
src/elements/timestamp.rs Normal file
View file

@ -0,0 +1,527 @@
use memchr::memchr;
use std::str::FromStr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Clone, Copy)]
pub struct Datetime<'a> {
pub(crate) date: &'a str,
pub(crate) time: Option<&'a str>,
pub(crate) dayname: &'a str,
}
impl Datetime<'_> {
pub fn year(&self) -> u32 {
u32::from_str(&self.date[0..4]).unwrap()
}
pub fn month(&self) -> u32 {
u32::from_str(&self.date[5..7]).unwrap()
}
pub fn day(&self) -> u32 {
u32::from_str(&self.date[8..10]).unwrap()
}
pub fn hour(&self) -> Option<u32> {
self.time.map(|time| {
if time.len() == 4 {
u32::from_str(&time[0..1]).unwrap()
} else {
u32::from_str(&time[0..2]).unwrap()
}
})
}
pub fn minute(&self) -> Option<u32> {
self.time.map(|time| {
if time.len() == 4 {
u32::from_str(&time[2..4]).unwrap()
} else {
u32::from_str(&time[3..5]).unwrap()
}
})
}
pub fn dayname(&self) -> &str {
self.dayname
}
}
#[cfg(feature = "chrono")]
mod chrono {
use super::Datetime;
use chrono::*;
impl<'a> Datetime<'a> {
pub fn naive_date(&self) -> NaiveDate {
NaiveDate::from_ymd(self.year() as i32, self.month(), self.day())
}
pub fn naive_time(&self) -> NaiveTime {
NaiveTime::from_hms(self.hour().unwrap_or(0), self.minute().unwrap_or(0), 0)
}
pub fn naive_date_time(&self) -> NaiveDateTime {
NaiveDateTime::new(self.naive_date(), self.naive_time())
}
pub fn date_time<Tz: TimeZone>(&self, offset: Tz::Offset) -> DateTime<Tz> {
DateTime::from_utc(self.naive_date_time(), offset)
}
pub fn date<Tz: TimeZone>(&self, offset: Tz::Offset) -> Date<Tz> {
Date::from_utc(self.naive_date(), offset)
}
}
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Clone, Copy)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Copy, Clone)]
pub enum DelayType {
All,
First,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Copy, Clone)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Copy, Clone)]
pub struct Repeater {
pub ty: RepeaterType,
pub value: usize,
pub unit: TimeUnit,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug, Copy, Clone)]
pub struct Delay {
pub ty: DelayType,
pub value: usize,
pub unit: TimeUnit,
}
/// timestamp obejcts
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Timestamp<'a> {
Active {
start: Datetime<'a>,
repeater: Option<&'a str>,
delay: Option<&'a str>,
},
Inactive {
start: Datetime<'a>,
repeater: Option<&'a str>,
delay: Option<&'a str>,
},
ActiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
repeater: Option<&'a str>,
delay: Option<&'a str>,
},
InactiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
repeater: Option<&'a str>,
delay: Option<&'a str>,
},
Diary(&'a str),
}
impl Timestamp<'_> {
pub(crate) fn parse(text: &str) -> Option<(Timestamp<'_>, usize)> {
if text.starts_with('<') {
Timestamp::parse_active(text).or_else(|| Timestamp::parse_diary(text))
} else if text.starts_with('[') {
Timestamp::parse_inactive(text)
} else {
None
}
}
pub(crate) fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
let bytes = text.as_bytes();
let mut off = memchr(b'>', bytes)?;
let (start, mut end) = Self::parse_datetime(&text[1..off])?;
if end.is_none()
&& off + "--<YYYY-MM-DD >".len() <= text.len()
&& text[off + 1..].starts_with("--<")
{
if let Some(new_off) = memchr(b'>', &bytes[off + 1..]) {
if let Some((start, _)) = Self::parse_datetime(&text[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Active {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
pub(crate) fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('['));
let bytes = text.as_bytes();
let mut off = memchr(b']', bytes)?;
let (start, mut end) = Self::parse_datetime(&text[1..off])?;
if end.is_none()
&& off + "--[YYYY-MM-DD ]".len() <= text.len()
&& text[off + 1..].starts_with("--[")
{
if let Some(new_off) = memchr(b']', &bytes[off + 1..]) {
if let Some((start, _)) = Self::parse_datetime(&text[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Inactive {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
fn parse_datetime(text: &str) -> Option<(Datetime<'_>, Option<Datetime<'_>>)> {
if text.is_empty()
|| !text.starts_with(|c: char| c.is_ascii_digit())
|| !text.ends_with(|c: char| c.is_ascii_alphanumeric())
{
return None;
}
let mut words = text.split_ascii_whitespace();
let date = words.next().filter(|word| {
let word = word.as_bytes();
// YYYY-MM-DD
word.len() == 10
&& word[0..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5..7].iter().all(u8::is_ascii_digit)
&& word[7] == b'-'
&& word[8..10].iter().all(u8::is_ascii_digit)
})?;
let dayname = words.next().filter(|word| {
word.as_bytes().iter().all(|&c| {
!(c == b'+'
|| c == b'-'
|| c == b']'
|| c == b'>'
|| c.is_ascii_digit()
|| c == b'\n')
})
})?;
let (start, end) = if let Some(word) = words.next() {
let time = word.as_bytes();
if (time.len() == "H:MM".len()
&& time[0].is_ascii_digit()
&& time[1] == b':'
&& time[2..4].iter().all(u8::is_ascii_digit))
|| (time.len() == "HH:MM".len()
&& time[0..2].iter().all(u8::is_ascii_digit)
&& time[2] == b':'
&& time[3..5].iter().all(u8::is_ascii_digit))
{
(
Datetime {
date,
dayname,
time: Some(word),
},
None,
)
} else if time.len() == "H:MM-H:MM".len()
&& time[0].is_ascii_digit()
&& time[1] == b':'
&& time[2..4].iter().all(u8::is_ascii_digit)
&& time[4] == b'-'
&& time[5].is_ascii_digit()
&& time[6] == b':'
&& time[7..9].iter().all(u8::is_ascii_digit)
{
(
Datetime {
date,
dayname,
time: Some(&word[0..4]),
},
Some(Datetime {
date,
dayname,
time: Some(&word[5..9]),
}),
)
} else if time.len() == "H:MM-HH:MM".len()
&& time[0].is_ascii_digit()
&& time[1] == b':'
&& time[2..4].iter().all(u8::is_ascii_digit)
&& time[4] == b'-'
&& time[5..7].iter().all(u8::is_ascii_digit)
&& time[7] == b':'
&& time[8..10].iter().all(u8::is_ascii_digit)
{
(
Datetime {
date,
dayname,
time: Some(&word[0..4]),
},
Some(Datetime {
date,
dayname,
time: Some(&word[5..10]),
}),
)
} else if time.len() == "HH:MM-H:MM".len()
&& time[0..2].iter().all(u8::is_ascii_digit)
&& time[2] == b':'
&& time[3..5].iter().all(u8::is_ascii_digit)
&& time[5] == b'-'
&& time[6].is_ascii_digit()
&& time[7] == b':'
&& time[8..10].iter().all(u8::is_ascii_digit)
{
(
Datetime {
date,
dayname,
time: Some(&word[0..5]),
},
Some(Datetime {
date,
dayname,
time: Some(&word[6..10]),
}),
)
} else if time.len() == "HH:MM-HH:MM".len()
&& time[0..2].iter().all(u8::is_ascii_digit)
&& time[2] == b':'
&& time[3..5].iter().all(u8::is_ascii_digit)
&& time[5] == b'-'
&& time[6..8].iter().all(u8::is_ascii_digit)
&& time[8] == b':'
&& time[9..11].iter().all(u8::is_ascii_digit)
{
(
Datetime {
date,
dayname,
time: Some(&word[0..5]),
},
Some(Datetime {
date,
dayname,
time: Some(&word[6..11]),
}),
)
} else {
return None;
}
} else {
(
Datetime {
date,
dayname,
time: None,
},
None,
)
};
// TODO: repeater and delay
if words.next().is_some() {
None
} else {
Some((start, end))
}
}
pub(crate) fn parse_diary(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
if text.len() <= "<%%()>".len() || &text[1..4] != "%%(" {
return None;
}
let bytes = text.as_bytes();
memchr(b'>', bytes)
.filter(|i| {
bytes[i - 1] == b')' && bytes["<%%(".len()..i - 1].iter().all(|&c| c != b'\n')
})
.map(|i| (Timestamp::Diary(&text["<%%(".len()..i - 1]), i))
}
}
#[test]
fn parse_range() {
use super::*;
assert_eq!(
Timestamp::parse_inactive("[2003-09-16 Tue]"),
Some((
Timestamp::Inactive {
start: Datetime {
date: "2003-09-16",
time: None,
dayname: "Tue"
},
repeater: None,
delay: None,
},
"[2003-09-16 Tue]".len()
))
);
assert_eq!(
Timestamp::parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"),
Some((
Timestamp::InactiveRange {
start: Datetime {
date: "2003-09-16",
time: Some("09:39"),
dayname: "Tue"
},
end: Datetime {
date: "2003-09-16",
time: Some("10:39"),
dayname: "Tue"
},
repeater: None,
delay: None
},
"[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]".len()
))
);
assert_eq!(
Timestamp::parse_active("<2003-09-16 Tue 09:39-10:39>"),
Some((
Timestamp::ActiveRange {
start: Datetime {
date: "2003-09-16",
time: Some("09:39"),
dayname: "Tue"
},
end: Datetime {
date: "2003-09-16",
time: Some("10:39"),
dayname: "Tue"
},
repeater: None,
delay: None
},
"<2003-09-16 Tue 09:39-10:39>".len()
))
);
}
#[test]
fn parse_datetime() {
use super::*;
assert_eq!(
Timestamp::parse_datetime("2003-09-16 Tue"),
Some((
Datetime {
date: "2003-09-16",
time: None,
dayname: "Tue"
},
None
))
);
assert_eq!(
Timestamp::parse_datetime("2003-09-16 Tue 9:39"),
Some((
Datetime {
date: "2003-09-16",
time: Some("9:39"),
dayname: "Tue"
},
None
))
);
assert_eq!(
Timestamp::parse_datetime("2003-09-16 Tue 09:39"),
Some((
Datetime {
date: "2003-09-16",
time: Some("09:39"),
dayname: "Tue"
},
None
))
);
assert_eq!(
Timestamp::parse_datetime("2003-09-16 Tue 9:39-10:39"),
Some((
Datetime {
date: "2003-09-16",
time: Some("9:39"),
dayname: "Tue"
},
Some(Datetime {
date: "2003-09-16",
time: Some("10:39"),
dayname: "Tue"
}),
))
);
assert_eq!(Timestamp::parse_datetime("2003-9-16 Tue"), None);
assert_eq!(Timestamp::parse_datetime("2003-09-16"), None);
assert_eq!(Timestamp::parse_datetime("2003-09-16 09:39"), None);
assert_eq!(Timestamp::parse_datetime("2003-09-16 Tue 0939"), None);
}