feat: list parsing

This commit is contained in:
PoiScript 2019-01-17 11:47:14 +08:00
parent 75362bd2a8
commit ad9f29bcb9
9 changed files with 512 additions and 122 deletions

View file

@ -1,3 +1,5 @@
use regex::Regex;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Block;
@ -12,9 +14,9 @@ impl Block {
let args = eol!(src);
let name = until_while!(src, 8, |c| c == b' ' || c == b'\n', |c: u8| c
.is_ascii_alphabetic())?;
// TODO: ignore case match
let content = src.find(&format!("\n#+END_{}", &src[8..name]))?;
let end = eol!(src, content + 1);
let end_re = format!(r"(?im)^[ \t]*#\+END_{}[ \t]*$", &src[8..name]);
let end_re = Regex::new(&end_re).unwrap();
let (content, end) = end_re.find(src).map(|m| (m.start(), m.end()))?;
Some((
&src[8..name],
@ -24,13 +26,34 @@ impl Block {
Some(&src[name..args])
},
args,
content + 1,
end + 1,
content,
// including the eol character
if end < src.len() && src.as_bytes()[end] == b'\n' {
end + 1
} else {
end
},
))
}
}
#[test]
fn parse() {
// TODO: testing
assert_eq!(
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
Some(("SRC", None, 11, 12, 21))
);
assert_eq!(
Block::parse(
r#"#+BEGIN_SRC rust
fn main() {
// print "Hello World!" to the console
println!("Hello World!");
}
#+END_SRC
"#
),
Some(("SRC", Some(" rust"), 16, 104, 114))
);
// TODO: more testing
}

226
src/elements/list.rs Normal file
View file

@ -0,0 +1,226 @@
pub struct List;
impl List {
#[inline]
fn is_item(src: &str) -> bool {
if src.len() < 2 {
return false;
}
let bytes = src.as_bytes();
let i = match bytes[0] {
b'*' | b'-' | b'+' => 1,
b'0'...b'9' => {
let i = bytes
.iter()
.position(|&c| !c.is_ascii_digit())
.unwrap_or_else(|| src.len());
if i >= src.len() - 1 {
return false;
}
let c = bytes[i];
if !(c == b'.' || c == b')') {
return false;
}
i + 1
}
_ => return false,
};
// bullet is follwed by a space or line ending
bytes[i] == b' ' || bytes[i] == b'\n'
}
#[inline]
pub fn is_ordered(byte: u8) -> bool {
match byte {
b'*' | b'-' | b'+' => false,
b'0'...b'9' => true,
_ => unreachable!(),
}
}
// returns (contents_begin, contents_end)
// TODO: handle nested list
pub fn parse_item(src: &str, ident: usize) -> (usize, usize) {
(
src[ident..].find(' ').map(|i| ident + i + 1).unwrap(),
if ident > 0 {
src.find(&format!("\n{:1$}", " ", ident))
.map(|i| i + 1)
.unwrap_or_else(|| src.len())
} else {
src.find('\n').map(|i| i + 1).unwrap_or_else(|| src.len())
},
)
}
// return (ident, is_ordered, end)
pub fn parse(src: &str) -> Option<(usize, bool, usize)> {
macro_rules! ident {
($src:expr) => {
$src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
};
}
let bytes = src.as_bytes();
let starting_ident = ident!(src);
if !Self::is_item(&src[starting_ident..]) {
return None;
}
let is_ordered = Self::is_ordered(bytes[starting_ident]);
let mut pos = starting_ident;
while let Some(i) = src[pos..]
.find('\n')
.map(|i| i + pos + 1)
.filter(|&i| i != src.len())
{
let ident = ident!(src[i..]);
// less indented than its starting line
if ident < starting_ident {
return Some((starting_ident, is_ordered, i - 1));
}
if ident > starting_ident {
pos = i;
continue;
}
if bytes[ident + i] == b'\n' && pos < src.len() {
let nextline_ident = ident!(src[ident + i + 1..]);
// check if it's two consecutive empty lines
if nextline_ident < starting_ident
|| (ident + i + 1 + nextline_ident < src.len()
&& bytes[ident + i + 1 + nextline_ident] == b'\n')
{
return Some((starting_ident, is_ordered, ident + i + 1 + nextline_ident));
}
if nextline_ident == starting_ident {
if Self::is_item(&src[i + nextline_ident + 1..]) {
pos = i + nextline_ident + 1;
continue;
} else {
return Some((starting_ident, is_ordered, ident + i + 1 + nextline_ident));
}
}
}
if Self::is_item(&src[i + ident..]) {
pos = i;
continue;
} else {
return Some((starting_ident, is_ordered, i - 1));
}
}
Some((starting_ident, is_ordered, src.len()))
}
}
#[test]
fn parse() {
assert_eq!(
List::parse(
r"+ item1
+ item2
+ item3"
),
Some((0, false, 23))
);
assert_eq!(
List::parse(
r"* item1
* item2
* item3"
),
Some((0, false, 24))
);
assert_eq!(
List::parse(
r"- item1
- item2
- item1"
),
Some((0, false, 17))
);
assert_eq!(
List::parse(
r"1. item1
2. item1
3. item2"
),
Some((0, true, 28))
);
assert_eq!(
List::parse(
r" 1) item1
2) item1
3) item2"
),
Some((2, true, 10))
);
assert_eq!(
List::parse(
r" + item1
1) item1
+ item2"
),
Some((2, false, 32))
);
assert_eq!(
List::parse(
r" item1
+ item1
+ item2"
),
None
);
}
#[test]
fn is_item() {
assert!(List::is_item("+ item"));
assert!(List::is_item("- item"));
assert!(List::is_item("10. item"));
assert!(List::is_item("10) item"));
assert!(List::is_item("1. item"));
assert!(List::is_item("1) item"));
assert!(List::is_item("10. "));
assert!(List::is_item("10.\n"));
assert!(!List::is_item("10."));
assert!(!List::is_item("-item"));
assert!(!List::is_item("+item"));
}
#[test]
fn parse_item() {
assert_eq!(List::parse_item("+ Item1\n+ Item2", 0), (2, 8));
assert_eq!(
List::parse_item(
r"+ item1
+ item1
+ item2",
0
),
(2, 8)
);
assert_eq!(
List::parse_item(
r" 1. item1
+ item2",
2
),
(5, 11)
);
}

View file

@ -72,6 +72,11 @@ pub enum Element<'a> {
},
Rule,
Comment(&'a str),
List {
ident: usize,
is_ordered: bool,
end: usize,
},
}
impl<'a> Element<'a> {
@ -125,6 +130,23 @@ impl<'a> Element<'a> {
};
}
if bytes[pos] == b'+'
|| bytes[pos] == b'-'
|| bytes[pos] == b'*'
|| (bytes[pos] >= b'0' && bytes[pos] <= b'9')
{
if let Some((ident, is_ordered, list_end)) = List::parse(&src[end..]) {
ret!(
Element::List {
ident,
is_ordered,
end: list_end
},
end
);
}
}
if bytes[pos] == b'\n' {
return (start, Some(Element::Paragraph { end, trailing: pos }), None);
}
@ -134,7 +156,8 @@ impl<'a> Element<'a> {
// Rule
if bytes[pos] == b'-' {
if let Some(off) = Rule::parse(&src[pos..]) {
let off = Rule::parse(&src[pos..]);
if off != 0 {
ret!(Element::Rule, off);
}
}
@ -227,7 +250,7 @@ impl<'a> Element<'a> {
}
// Comment
if bytes[pos] == b'#' && bytes.get(pos + 1).filter(|&&b| b == b' ').is_some() {
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) {
let eol = src[pos..]
.find('\n')
.map(|i| i + pos + 1)

View file

@ -1,34 +1,35 @@
use regex::Regex;
lazy_static! {
static ref RULE_REGEX: Regex = Regex::new(r"^[ \t]*-{5,}[ \t]*\n?$").unwrap();
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Rule;
impl Rule {
pub fn parse(src: &str) -> Option<usize> {
let end = eol!(src);
let leading = until_while!(src, 0, b'-', |c| c == b' ' || c == b'\t')?;
if src[leading..end].chars().all(|c| c == '-') && end - leading > 4 {
Some(end)
} else {
None
}
pub fn parse(src: &str) -> usize {
RULE_REGEX.find(src).map(|m| m.end()).unwrap_or(0)
}
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----").unwrap(), "-----".len());
assert_eq!(Rule::parse("--------").unwrap(), "--------".len());
assert_eq!(Rule::parse(" -----").unwrap(), " -----".len());
assert_eq!(Rule::parse("\t\t-----").unwrap(), "\t\t-----".len());
assert!(Rule::parse("").is_none());
assert!(Rule::parse("----").is_none());
assert!(Rule::parse(" ----").is_none());
assert!(Rule::parse(" 0----").is_none());
assert!(Rule::parse("0 ----").is_none());
assert!(Rule::parse("0------").is_none());
assert!(Rule::parse("----0----").is_none());
assert!(Rule::parse("\t\t----").is_none());
assert!(Rule::parse("------0").is_none());
assert!(Rule::parse("----- 0").is_none());
assert_eq!(Rule::parse("-----"), "-----".len());
assert_eq!(Rule::parse("--------"), "--------".len());
assert_eq!(Rule::parse(" -----"), " -----".len());
assert_eq!(Rule::parse("\t\t-----"), "\t\t-----".len());
assert_eq!(Rule::parse("\t\t-----\n"), "\t\t-----\n".len());
assert_eq!(Rule::parse("\t\t----- \n"), "\t\t----- \n".len());
assert_eq!(Rule::parse(""), 0);
assert_eq!(Rule::parse("----"), 0);
assert_eq!(Rule::parse(" ----"), 0);
assert_eq!(Rule::parse(" 0----"), 0);
assert_eq!(Rule::parse("0 ----"), 0);
assert_eq!(Rule::parse("0------"), 0);
assert_eq!(Rule::parse("----0----"), 0);
assert_eq!(Rule::parse("\t\t----"), 0);
assert_eq!(Rule::parse("------0"), 0);
assert_eq!(Rule::parse("----- 0"), 0);
}