feat(parser): timestamp parsing

This commit is contained in:
PoiScript 2019-04-04 21:08:23 +08:00
parent 1f52e75d3d
commit 1bb5286dd3
12 changed files with 887 additions and 1023 deletions

View file

@ -6,9 +6,11 @@ use memchr::memchr2;
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> {
debug_assert!(text.starts_with("{{{"));
expect!(text, 3, |c: u8| c.is_ascii_alphabetic())?;
let bytes = text.as_bytes();
if text.len() <= 3 || !bytes[3].is_ascii_alphabetic() {
return None;
}
let (name, off) = memchr2(b'}', b'(', bytes)
.filter(|&i| {
bytes[3..i]
@ -18,8 +20,9 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> {
.map(|i| (&text[3..i], i))?;
let (args, off) = if bytes[off] == b'}' {
expect!(text, off + 1, b'}')?;
expect!(text, off + 2, b'}')?;
if text.len() <= off + 2 || bytes[off + 1] != b'}' || bytes[off + 2] != b'}' {
return None;
}
(None, off + 3 /* }}} */)
} else {
Substring::new(")}}}")

View file

@ -1,200 +1,14 @@
mod cookie;
mod emphasis;
mod fn_ref;
mod inline_call;
mod inline_src;
mod link;
mod macros;
mod radio_target;
mod snippet;
mod target;
pub(crate) mod cookie;
pub(crate) mod emphasis;
pub(crate) mod fn_ref;
pub(crate) mod inline_call;
pub(crate) mod inline_src;
pub(crate) mod link;
pub(crate) mod macros;
pub(crate) mod radio_target;
pub(crate) mod snippet;
pub(crate) mod target;
pub(crate) mod timestamp;
pub use self::cookie::Cookie;
use jetscii::bytes;
#[cfg_attr(test, derive(PartialEq, Debug))]
pub enum Object<'a> {
Cookie(Cookie<'a>),
FnRef {
label: Option<&'a str>,
def: Option<&'a str>,
},
InlineCall {
name: &'a str,
args: &'a str,
inside_header: Option<&'a str>,
end_header: Option<&'a str>,
},
InlineSrc {
lang: &'a str,
option: Option<&'a str>,
body: &'a str,
},
Link {
path: &'a str,
desc: Option<&'a str>,
},
Macros {
name: &'a str,
args: Option<&'a str>,
},
RadioTarget {
target: &'a str,
},
Snippet {
name: &'a str,
value: &'a str,
},
Target {
target: &'a str,
},
// `end` indicates the position of the second marker
Bold {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Underline {
end: usize,
},
Verbatim(&'a str),
Code(&'a str),
Text(&'a str),
}
pub fn parse(src: &str) -> (Object<'_>, usize, Option<(Object<'_>, usize)>) {
let bytes = src.as_bytes();
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0;
while let Some(off) = if pos == 0 {
Some(0)
} else {
bs.find(&bytes[pos..])
} {
pos += off;
if src.len() - pos < 3 {
return (Object::Text(src), src.len(), None);
}
macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => {
return if $pos == 0 {
($obj, $off, None)
} else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
};
};
}
let tail = &src[pos..];
match bytes[pos] {
b'@' if bytes[pos + 1] == b'@' => {
if let Some((name, value, off)) = snippet::parse(tail) {
brk!(Object::Snippet { name, value }, off, pos);
}
}
b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => {
if let Some((name, args, off)) = macros::parse(tail) {
brk!(Object::Macros { name, args }, off, pos);
}
}
b'<' if bytes[pos + 1] == b'<' => {
if bytes[pos + 2] == b'<' {
if let Some((target, off)) = radio_target::parse(tail) {
brk!(Object::RadioTarget { target }, off, pos);
}
} else if bytes[pos + 2] != b'\n' {
if let Some((target, off)) = target::parse(tail) {
brk!(Object::Target { target }, off, pos);
}
}
}
b'[' => {
if tail[1..].starts_with("fn:") {
if let Some((label, def, off)) = fn_ref::parse(tail) {
brk!(Object::FnRef { label, def }, off, pos);
}
}
if bytes[pos + 1] == b'[' {
if let Some((path, desc, off)) = link::parse(tail) {
brk!(Object::Link { path, desc }, off, pos);
}
}
if let Some((cookie, off)) = cookie::parse(tail) {
brk!(Object::Cookie(cookie), off, pos);
}
// TODO: Timestamp
}
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some((obj, off)) = parse_text_markup(&tail[1..]) {
brk!(obj, off, pos + 1);
}
}
_ => {
if let Some((obj, off)) = parse_text_markup(tail) {
brk!(obj, off, pos);
}
}
}
pos += 1;
}
(Object::Text(src), src.len(), None)
}
fn parse_text_markup(src: &str) -> Option<(Object<'_>, usize)> {
match src.as_bytes()[0] {
b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)),
b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)),
b'/' => emphasis::parse(src, b'/').map(|end| (Object::Italic { end }, 1)),
b'_' => emphasis::parse(src, b'_').map(|end| (Object::Underline { end }, 1)),
b'=' => emphasis::parse(src, b'=').map(|end| (Object::Verbatim(&src[1..end]), end + 1)),
b'~' => emphasis::parse(src, b'~').map(|end| (Object::Code(&src[1..end]), end + 1)),
b's' if src.starts_with("src_") => inline_src::parse(src)
.map(|(lang, option, body, off)| (Object::InlineSrc { lang, option, body }, off)),
b'c' if src.starts_with("call_") => {
inline_call::parse(src).map(|(name, args, inside_header, end_header, off)| {
(
Object::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
)
})
}
_ => None,
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::*;
assert_eq!(parse("*bold*"), (Object::Bold { end: 5 }, 1, None));
assert_eq!(
parse("Normal =verbatim="),
(
Object::Text("Normal "),
"Normal ".len(),
Some((Object::Verbatim("verbatim"), "=verbatim=".len()))
)
);
// TODO: more tests
}
}
pub use self::timestamp::*;

View file

@ -1,9 +1,401 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Time<'a> {
pub date: &'a str,
pub struct Datetime {
pub date: (u16, u8, u8),
pub time: Option<(u8, u8)>,
}
pub enum Timestamp<'a> {
ActiveRange,
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum DelayType {
All,
First,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Repeater {
pub ty: RepeaterType,
pub value: usize,
pub unit: TimeUnit,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Delay {
pub ty: DelayType,
pub value: usize,
pub unit: TimeUnit,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Timestamp<'a> {
Active {
start: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
Inactive {
start: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
ActiveRange {
start: Datetime,
end: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
InactiveRange {
start: Datetime,
end: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
Diary(&'a str),
}
pub fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
let bytes = text.as_bytes();
let mut off = memchr(b'>', bytes)?;
let (start, mut end) = parse_datetime(&bytes[1..off])?;
if end.is_none()
&& off <= text.len() - 14 /* --<YYYY-MM-DD> */
&& text[off + 1..].starts_with("--<")
{
if let Some(new_off) = memchr(b'>', &bytes[off + 1..]) {
if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Active {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
pub fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('['));
let bytes = text.as_bytes();
let mut off = memchr(b']', bytes)?;
let (start, mut end) = parse_datetime(&bytes[1..off])?;
if end.is_none()
&& off <= text.len() - 14 /* --[YYYY-MM-DD] */
&& text[off + 1..].starts_with("--[")
{
if let Some(new_off) = memchr(b']', &bytes[off + 1..]) {
if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Inactive {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
fn parse_datetime(bytes: &[u8]) -> Option<(Datetime, Option<Datetime>)> {
if !bytes[0].is_ascii_digit() || !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
return None;
}
// similar to str::split_ascii_whitespace, but for &[u8]
let mut words = bytes
.split(u8::is_ascii_whitespace)
.filter(|s| !s.is_empty());
let date = words
.next()
.filter(|word| {
word.len() == 10 /* YYYY-MM-DD */
&& word[0..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5..7].iter().all(u8::is_ascii_digit)
&& word[7] == b'-'
&& word[8..10].iter().all(u8::is_ascii_digit)
})
.map(|word| {
(
(u16::from(word[0]) - u16::from(b'0')) * 1000
+ (u16::from(word[1]) - u16::from(b'0')) * 100
+ (u16::from(word[2]) - u16::from(b'0')) * 10
+ (u16::from(word[3]) - u16::from(b'0')),
(word[5] - b'0') * 10 + (word[6] - b'0'),
(word[8] - b'0') * 10 + (word[9] - b'0'),
)
})?;
let _dayname = words.next().filter(|word| {
word.iter().all(|&c| {
!(c == b'+' || c == b'-' || c == b']' || c == b'>' || c.is_ascii_digit() || c == b'\n')
})
})?;
let (start, end) = if let Some(word) = words.next() {
macro_rules! datetime {
($a:expr, $b:expr, $c:expr) => {
Datetime {
date,
time: Some((word[$a] - b'0', (word[$b] - b'0') * 10 + (word[$c] - b'0'))),
}
};
($a:expr, $b:expr, $c:expr, $d:expr) => {
Datetime {
date,
time: Some((
(word[$a] - b'0') * 10 + (word[$b] - b'0'),
(word[$c] - b'0') * 10 + (word[$d] - b'0'),
)),
}
};
}
if word.len() == 4 // H:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), None)
} else if word.len() == 5 // HH:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), None)
} else if word.len() == 9 // H:MM-H:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5].is_ascii_digit()
&& word[6] == b':'
&& word[7..9].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), Some(datetime!(5, 7, 8)))
} else if word.len() == 10 // H:MM-HH:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5..7].iter().all(u8::is_ascii_digit)
&& word[7] == b':'
&& word[8..10].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), Some(datetime!(5, 6, 8, 9)))
} else if word.len() == 10 // HH:MM-H:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
&& word[5] == b'-'
&& word[6].is_ascii_digit()
&& word[7] == b':'
&& word[8..10].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), Some(datetime!(6, 8, 9)))
} else if word.len() == 11 // HH:MM-HH:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
&& word[5] == b'-'
&& word[6..8].iter().all(u8::is_ascii_digit)
&& word[8] == b':'
&& word[9..11].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), Some(datetime!(6, 7, 9, 10)))
} else {
return None;
}
} else {
(Datetime { date, time: None }, None)
};
// TODO: repeater and delay
if words.next().is_some() {
None
} else {
Some((start, end))
}
}
pub fn parse_diary(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
if text.len() <= 6 /* <%%()> */ || &text[1..4] != "%%(" {
return None;
}
let bytes = text.as_bytes();
memchr(b'>', bytes)
.filter(|i| bytes[i - 1] == b')' && bytes[4..i - 1].iter().all(|&c| c != b'\n'))
.map(|i| (Timestamp::Diary(&text[4..i - 1]), i))
}
#[cfg(test)]
mod tests {
#[test]
fn parse_range() {
use super::*;
assert_eq!(
parse_inactive("[2003-09-16 Tue]"),
Some((
Timestamp::Inactive {
start: Datetime {
date: (2003, 9, 16),
time: None
},
repeater: None,
delay: None,
},
"[2003-09-16 Tue]".len()
))
);
assert_eq!(
parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"),
Some((
Timestamp::InactiveRange {
start: Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
end: Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
},
repeater: None,
delay: None
},
"[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]".len()
))
);
assert_eq!(
parse_active("<2003-09-16 Tue 09:39-10:39>"),
Some((
Timestamp::ActiveRange {
start: Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
end: Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
},
repeater: None,
delay: None
},
"<2003-09-16 Tue 09:39-10:39>".len()
))
);
}
#[test]
fn parse_datetime() {
use super::*;
assert_eq!(
parse_datetime(b"2003-09-16 Tue"),
Some((
Datetime {
date: (2003, 9, 16),
time: None
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 9:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 09:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 9:39-10:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
Some(Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
}),
))
);
assert_eq!(parse_datetime(b"2003-9-16 Tue"), None);
assert_eq!(parse_datetime(b"2003-09-16"), None);
assert_eq!(parse_datetime(b"2003-09-16 09:39"), None);
assert_eq!(parse_datetime(b"2003-09-16 Tue 0939"), None);
}
}