366 lines
10 KiB
Rust
366 lines
10 KiB
Rust
use memchr::memrchr_iter;
|
|
use nom::{
|
|
bytes::complete::take_while1,
|
|
character::complete::{anychar, space0},
|
|
combinator::{map, opt, verify},
|
|
sequence::tuple,
|
|
AsBytes, IResult, InputLength, InputTake, Slice,
|
|
};
|
|
|
|
use super::{
|
|
combinator::{
|
|
hash_token, l_bracket_token, line_starts_iter, node, r_bracket_token, token, trim_line_end,
|
|
GreenElement, NodeBuilder,
|
|
},
|
|
drawer::property_drawer_node,
|
|
element::element_nodes,
|
|
input::Input,
|
|
object::object_nodes,
|
|
planning::planning_node,
|
|
SyntaxKind::*,
|
|
};
|
|
|
|
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
|
pub fn headline_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|
debug_assert!(!input.is_empty());
|
|
crate::lossless_parser!(headline_node_base, input)
|
|
}
|
|
|
|
fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|
let (input, stars) = headline_stars(input)?;
|
|
|
|
let mut b = NodeBuilder::new();
|
|
|
|
b.token(HEADLINE_STARS, stars);
|
|
|
|
let (input, ws) = space0(input)?;
|
|
b.ws(ws);
|
|
|
|
let (input, headline_keyword) = opt(headline_keyword_token)(input)?;
|
|
|
|
if let Some((headline_keyword, ws)) = headline_keyword {
|
|
b.push(headline_keyword);
|
|
b.ws(ws);
|
|
}
|
|
|
|
let (input, headline_priority) = opt(headline_priority_node)(input)?;
|
|
|
|
if let Some((headline_priority, ws)) = headline_priority {
|
|
b.push(headline_priority);
|
|
b.ws(ws);
|
|
}
|
|
|
|
let (input, (title_and_tags, ws_, nl)) = trim_line_end(input)?;
|
|
let (title, tags) = opt(headline_tags_node)(title_and_tags)?;
|
|
|
|
if !title.is_empty() {
|
|
b.push(node(HEADLINE_TITLE, object_nodes(title)));
|
|
}
|
|
b.push_opt(tags);
|
|
b.ws(ws_);
|
|
b.nl(nl);
|
|
|
|
if input.is_empty() {
|
|
return Ok((input, b.finish(HEADLINE)));
|
|
}
|
|
|
|
let (input, planning) = opt(planning_node)(input)?;
|
|
b.push_opt(planning);
|
|
|
|
if input.is_empty() {
|
|
return Ok((input, b.finish(HEADLINE)));
|
|
}
|
|
|
|
let (input, property_drawer) = opt(property_drawer_node)(input)?;
|
|
b.push_opt(property_drawer);
|
|
|
|
if input.is_empty() {
|
|
return Ok((input, b.finish(HEADLINE)));
|
|
}
|
|
|
|
let (input, section) = opt(section_node)(input)?;
|
|
b.push_opt(section);
|
|
|
|
let mut i = input;
|
|
let current_level = stars.input_len();
|
|
while !i.is_empty() {
|
|
let next_level = i.bytes().take_while(|&c| c == b'*').count();
|
|
|
|
if next_level <= current_level {
|
|
break;
|
|
}
|
|
|
|
let (input, headline) = headline_node(i)?;
|
|
b.push(headline);
|
|
debug_assert!(
|
|
i.input_len() > input.input_len(),
|
|
"{} > {}",
|
|
i.input_len(),
|
|
input.input_len()
|
|
);
|
|
i = input;
|
|
}
|
|
|
|
Ok((i, b.finish(HEADLINE)))
|
|
}
|
|
|
|
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
|
pub fn section_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|
debug_assert!(!input.is_empty());
|
|
let (input, section) = section_text(input)?;
|
|
Ok((input, node(SECTION, element_nodes(section)?)))
|
|
}
|
|
|
|
fn section_text(input: Input) -> IResult<Input, Input, ()> {
|
|
for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
|
|
if headline_stars(input).is_ok() {
|
|
if section.is_empty() {
|
|
return Err(nom::Err::Error(()));
|
|
}
|
|
|
|
return Ok((input, section));
|
|
}
|
|
}
|
|
|
|
Ok(input.take_split(input.input_len()))
|
|
}
|
|
|
|
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
|
fn headline_stars(input: Input) -> IResult<Input, Input, ()> {
|
|
let bytes = input.as_bytes();
|
|
let level = bytes.iter().take_while(|&&c| c == b'*').count();
|
|
|
|
if level == 0 {
|
|
return Err(nom::Err::Error(()));
|
|
}
|
|
// followed by eof, new line, or whitespace
|
|
else if matches!(
|
|
bytes.get(level),
|
|
None | Some(b'\n') | Some(b'\r') | Some(b' ')
|
|
) {
|
|
Ok(input.take_split(level))
|
|
} else {
|
|
Err(nom::Err::Error(()))
|
|
}
|
|
}
|
|
|
|
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
|
fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|
if !input.s.ends_with(':') {
|
|
return Err(nom::Err::Error(()));
|
|
};
|
|
|
|
let bytes = input.as_bytes();
|
|
|
|
// we're going to skip to first colon, so we start from the
|
|
// second last character
|
|
let mut i = input.input_len() - 1;
|
|
let mut can_not_be_ws = true;
|
|
let mut children = vec![token(COLON, ":")];
|
|
|
|
for ii in memrchr_iter(b':', bytes).skip(1) {
|
|
let item = &bytes[ii + 1..i];
|
|
|
|
if item.is_empty() {
|
|
children.push(token(COLON, ":"));
|
|
can_not_be_ws = false;
|
|
debug_assert!(i > ii, "{} > {}", i, ii);
|
|
i = ii;
|
|
} else if String::from_utf8_lossy(item)
|
|
.chars()
|
|
// https://github.com/yyr/org-mode/blob/d8494b5668ad4d4e68e83228ae8451eaa01d2220/lisp/org-element.el#L922C25-L922C32
|
|
.all(|c| c.is_alphanumeric() || c == '_' || c == '@' || c == '#' || c == '%')
|
|
{
|
|
children.push(input.slice(ii + 1..i).text_token());
|
|
children.push(token(COLON, ":"));
|
|
can_not_be_ws = false;
|
|
debug_assert!(i > ii, "{} > {}", i, ii);
|
|
i = ii;
|
|
} else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws {
|
|
children.push(input.slice(ii + 1..i).ws_token());
|
|
children.push(token(COLON, ":"));
|
|
can_not_be_ws = true;
|
|
debug_assert!(i > ii, "{} > {}", i, ii);
|
|
i = ii;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if children.len() <= 2 {
|
|
return Err(nom::Err::Error(()));
|
|
}
|
|
|
|
if i != 0 && bytes[i - 1] != b' ' && bytes[i - 1] != b'\t' {
|
|
return Err(nom::Err::Error(()));
|
|
}
|
|
|
|
// we parse headline tag from right to left,
|
|
// so we need to reverse the result after it finishes
|
|
children.reverse();
|
|
|
|
Ok((input.slice(0..i), node(HEADLINE_TAGS, children)))
|
|
}
|
|
|
|
fn headline_keyword_token(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
|
let (input, word) = verify(
|
|
take_while1(|c: char| !c.is_ascii_whitespace()),
|
|
|input: &Input| {
|
|
let Input { c, s } = input;
|
|
c.todo_keywords.0.iter().any(|k| k == s) || c.todo_keywords.1.iter().any(|k| k == s)
|
|
},
|
|
)(input)?;
|
|
|
|
let (input, ws) = space0(input)?;
|
|
|
|
Ok((input, (word.token(HEADLINE_KEYWORD), ws)))
|
|
}
|
|
|
|
fn headline_priority_node(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
|
|
let (input, node) = map(
|
|
tuple((l_bracket_token, hash_token, anychar, r_bracket_token)),
|
|
|(l_bracket, hash, char, r_bracket)| {
|
|
node(
|
|
HEADLINE_PRIORITY,
|
|
[l_bracket, hash, token(TEXT, &char.to_string()), r_bracket],
|
|
)
|
|
},
|
|
)(input)?;
|
|
|
|
let (input, ws) = space0(input)?;
|
|
|
|
Ok((input, (node, ws)))
|
|
}
|
|
|
|
#[test]
|
|
fn parse() {
|
|
use crate::{ast::Headline, tests::to_ast};
|
|
|
|
let to_headline = to_ast::<Headline>(headline_node);
|
|
|
|
let hdl = to_headline("* foo");
|
|
|
|
insta::assert_debug_snapshot!(
|
|
hdl.syntax,
|
|
@r###"
|
|
HEADLINE@0..5
|
|
HEADLINE_STARS@0..1 "*"
|
|
WHITESPACE@1..2 " "
|
|
HEADLINE_TITLE@2..5
|
|
TEXT@2..5 "foo"
|
|
"###
|
|
);
|
|
|
|
let hdl = to_headline("* foo\n\n** bar");
|
|
insta::assert_debug_snapshot!(
|
|
hdl.syntax,
|
|
@r###"
|
|
HEADLINE@0..13
|
|
HEADLINE_STARS@0..1 "*"
|
|
WHITESPACE@1..2 " "
|
|
HEADLINE_TITLE@2..5
|
|
TEXT@2..5 "foo"
|
|
NEW_LINE@5..6 "\n"
|
|
SECTION@6..7
|
|
PARAGRAPH@6..7
|
|
BLANK_LINE@6..7 "\n"
|
|
HEADLINE@7..13
|
|
HEADLINE_STARS@7..9 "**"
|
|
WHITESPACE@9..10 " "
|
|
HEADLINE_TITLE@10..13
|
|
TEXT@10..13 "bar"
|
|
"###
|
|
);
|
|
|
|
let hdl = to_headline("* TODO foo\nbar\n** baz\n");
|
|
assert_eq!(hdl.level(), 1);
|
|
assert_eq!(hdl.keyword().unwrap().text(), "TODO");
|
|
insta::assert_debug_snapshot!(
|
|
hdl.syntax,
|
|
@r###"
|
|
HEADLINE@0..22
|
|
HEADLINE_STARS@0..1 "*"
|
|
WHITESPACE@1..2 " "
|
|
HEADLINE_KEYWORD@2..6 "TODO"
|
|
WHITESPACE@6..7 " "
|
|
HEADLINE_TITLE@7..10
|
|
TEXT@7..10 "foo"
|
|
NEW_LINE@10..11 "\n"
|
|
SECTION@11..15
|
|
PARAGRAPH@11..15
|
|
TEXT@11..15 "bar\n"
|
|
HEADLINE@15..22
|
|
HEADLINE_STARS@15..17 "**"
|
|
WHITESPACE@17..18 " "
|
|
HEADLINE_TITLE@18..21
|
|
TEXT@18..21 "baz"
|
|
NEW_LINE@21..22 "\n"
|
|
"###
|
|
);
|
|
|
|
let hdl = to_headline("** [#A] foo\n* baz");
|
|
assert_eq!(hdl.level(), 2);
|
|
assert_eq!(hdl.priority().unwrap().text(), "A");
|
|
insta::assert_debug_snapshot!(
|
|
hdl.syntax,
|
|
@r###"
|
|
HEADLINE@0..12
|
|
HEADLINE_STARS@0..2 "**"
|
|
WHITESPACE@2..3 " "
|
|
HEADLINE_PRIORITY@3..7
|
|
L_BRACKET@3..4 "["
|
|
HASH@4..5 "#"
|
|
TEXT@5..6 "A"
|
|
R_BRACKET@6..7 "]"
|
|
WHITESPACE@7..8 " "
|
|
HEADLINE_TITLE@8..11
|
|
TEXT@8..11 "foo"
|
|
NEW_LINE@11..12 "\n"
|
|
"###
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn issue_15_16() {
|
|
use crate::{ast::Headline, tests::to_ast};
|
|
|
|
let to_headline = to_ast::<Headline>(headline_node);
|
|
|
|
assert!(to_headline("* a ::").tags().count() == 0);
|
|
assert!(to_headline("* a : :").tags().count() == 0);
|
|
assert!(to_headline("* a :(:").tags().count() == 0);
|
|
assert!(to_headline("* a :a: :").tags().count() == 0);
|
|
assert!(to_headline("* a :a :").tags().count() == 0);
|
|
assert!(to_headline("* a a:").tags().count() == 0);
|
|
assert!(to_headline("* a :a").tags().count() == 0);
|
|
|
|
let tags = to_headline("* a \t:_:").tags();
|
|
assert_eq!(
|
|
vec!["_".to_string()],
|
|
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
|
|
);
|
|
|
|
let tags = to_headline("* a \t :@:").tags();
|
|
assert_eq!(
|
|
vec!["@".to_string()],
|
|
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
|
|
);
|
|
|
|
let tags = to_headline("* a :#:").tags();
|
|
assert_eq!(
|
|
vec!["#".to_string()],
|
|
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
|
|
);
|
|
|
|
let tags = to_headline("* a\t :%:").tags();
|
|
assert_eq!(
|
|
vec!["%".to_string()],
|
|
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
|
|
);
|
|
|
|
let tags = to_headline("* a :余: :破:").tags();
|
|
assert_eq!(
|
|
vec!["余".to_string(), "破".to_string()],
|
|
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
|
|
);
|
|
}
|