refactor: cleanup parser

This commit is contained in:
PoiScript 2019-04-05 22:54:50 +08:00
parent 1bb5286dd3
commit 8bb7ae41d3
6 changed files with 128 additions and 99 deletions

View file

@ -1,39 +1,46 @@
use crate::lines::Lines;
#[inline]
pub fn is_item(src: &str) -> (bool, bool) {
if src.is_empty() {
return (false, false);
pub fn is_item(text: &str) -> Option<bool> {
if text.is_empty() {
return None;
}
let bytes = src.as_bytes();
let (i, ordered) = match bytes[0] {
b'*' | b'-' | b'+' => (1, false),
let bytes = text.as_bytes();
match bytes[0] {
b'*' | b'-' | b'+' => {
if text.len() > 1 && (bytes[1] == b' ' || bytes[1] == b'\n') {
Some(false)
} else {
None
}
}
b'0'...b'9' => {
let i = bytes
.iter()
.position(|&c| !c.is_ascii_digit())
.unwrap_or_else(|| src.len() - 1);
let c = bytes[i];
if !(c == b'.' || c == b')') {
return (false, false);
.unwrap_or_else(|| text.len() - 1);
if (bytes[i] == b'.' || bytes[i] == b')')
&& i + 1 < text.len()
&& (bytes[i + 1] == b' ' || bytes[i + 1] == b'\n')
{
Some(true)
} else {
None
}
(i + 1, true)
}
_ => return (false, false),
};
if i < src.len() {
// bullet is follwed by a space or line ending
(bytes[i] == b' ' || bytes[i] == b'\n', ordered)
} else {
(false, false)
_ => None,
}
}
// returns (bullets, contents begin, contents end, end, has more)
#[inline]
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(is_item(&src[ident..]).0);
debug_assert!(
is_item(&src[ident..]).is_some(),
"{:?} is not a list item",
src
);
debug_assert!(
src[..ident].chars().all(|c| c == ' ' || c == '\t'),
"{:?} doesn't starts with indentation {}",
@ -42,60 +49,60 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
);
let mut lines = Lines::new(src);
let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap();
let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap();
let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
Some(i) => i + ident + 1,
None => {
let len = first_line.len();
return (
&first_line,
first_line,
len,
len,
len,
is_item(lines.next().unwrap().2).0,
is_item(lines.next().unwrap().2).is_some(),
);
}
};
let bullet = &src[0..beg];
let bullet = &src[0..begin];
while let Some((mut cont_end, mut end, mut line)) = lines.next() {
while let Some((mut limit, mut end, mut line)) = lines.next() {
// this line is emtpy
if line.is_empty() {
if let Some((next_cont_end, next_end, next_line)) = lines.next() {
if let Some((next_limit, next_end, next_line)) = lines.next() {
// next line is emtpy, too
if next_line.is_empty() {
return (bullet, beg, pre_cont_end, next_end, false);
return (bullet, begin, pre_limit, next_end, false);
} else {
// move to next line
pre_end = end;
cont_end = next_cont_end;
limit = next_limit;
end = next_end;
line = next_line;
}
} else {
return (bullet, beg, pre_cont_end, end, false);
return (bullet, begin, pre_limit, end, false);
}
}
let line_ident = count_ident(line);
if line_ident < ident {
return (bullet, beg, pre_cont_end, pre_end, false);
return (bullet, begin, pre_limit, pre_end, false);
} else if line_ident == ident {
return (
bullet,
beg,
pre_cont_end,
begin,
pre_limit,
pre_end,
is_item(&line[ident..]).0,
is_item(&line[ident..]).is_some(),
);
}
pre_end = end;
pre_cont_end = cont_end;
pre_limit = limit;
}
(bullet, beg, src.len(), src.len(), false)
(bullet, begin, src.len(), src.len(), false)
}
#[inline]
@ -112,18 +119,18 @@ mod tests {
fn is_item() {
use super::is_item;
assert_eq!(is_item("+ item"), (true, false));
assert_eq!(is_item("- item"), (true, false));
assert_eq!(is_item("10. item"), (true, true));
assert_eq!(is_item("10) item"), (true, true));
assert_eq!(is_item("1. item"), (true, true));
assert_eq!(is_item("1) item"), (true, true));
assert_eq!(is_item("10. "), (true, true));
assert_eq!(is_item("10.\n"), (true, true));
assert_eq!(is_item("10."), (false, false));
assert_eq!(is_item("+"), (false, false));
assert_eq!(is_item("-item"), (false, false));
assert_eq!(is_item("+item"), (false, false));
assert_eq!(is_item("+ item"), Some(false));
assert_eq!(is_item("- item"), Some(false));
assert_eq!(is_item("10. item"), Some(true));
assert_eq!(is_item("10) item"), Some(true));
assert_eq!(is_item("1. item"), Some(true));
assert_eq!(is_item("1) item"), Some(true));
assert_eq!(is_item("10. "), Some(true));
assert_eq!(is_item("10.\n"), Some(true));
assert_eq!(is_item("10."), None);
assert_eq!(is_item("+"), None);
assert_eq!(is_item("-item"), None);
assert_eq!(is_item("+item"), None);
}
#[test]