chore: prepare for v0.10.0-alpha.1

This commit is contained in:
PoiScript 2021-11-09 17:01:57 +08:00
parent 9d7852c4f9
commit af7c305c9e
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
111 changed files with 9132 additions and 9148 deletions

233
src/syntax/block.rs Normal file
View file

@ -0,0 +1,233 @@
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case},
character::complete::{alpha1, line_ending, space0},
combinator::eof,
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, debug_assert_lossless, line_starts_iter, node, token, trim_line_end,
GreenElement, NodeBuilder,
},
element::element_nodes,
input::Input,
SyntaxKind::*,
};
fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (block_begin, name)) = block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
let (kind, is_greater_block) = match name {
s if s.eq_ignore_ascii_case("COMMENT") => (COMMENT_BLOCK, false),
s if s.eq_ignore_ascii_case("EXAMPLE") => (EXAMPLE_BLOCK, false),
s if s.eq_ignore_ascii_case("EXPORT") => (EXPORT_BLOCK, false),
s if s.eq_ignore_ascii_case("SRC") => (SOURCE_BLOCK, false),
s if s.eq_ignore_ascii_case("CENTER") => (CENTER_BLOCK, true),
s if s.eq_ignore_ascii_case("QUOTE") => (QUOTE_BLOCK, true),
s if s.eq_ignore_ascii_case("VERSE") => (VERSE_BLOCK, true),
_ => (SPECIAL_BLOCK, true),
};
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, block_end)) = block_end_node(input, name) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![block_begin];
children.extend(pre_blank);
if is_greater_block {
children.push(node(BLOCK_CONTENT, element_nodes(contents)?));
} else {
children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents)));
}
children.push(block_end);
children.extend(post_blank);
return Ok((input, node(kind, children)));
}
}
Err(nom::Err::Error(()))
}
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let (input, (ws, start, name, (argument, ws_, nl))) =
tuple((space0, tag_no_case("#+BEGIN_"), alpha1, trim_line_end))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(start);
b.text(name);
b.text(argument);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
}
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
let (input, (ws, end, name, ws_, nl)) = tuple((
space0,
tag_no_case("#+END_"),
tag(name),
space0,
alt((line_ending, eof)),
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.text(name);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(BLOCK_END)))
}
fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
let mut nodes = vec![];
let s = input.as_str();
let mut start = 0;
for i in line_starts_iter(s) {
// line must start with either ",*" or ",#+"
if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") {
continue;
}
let text = &s[start..i];
if !text.is_empty() {
nodes.push(token(TEXT, text));
}
nodes.push(token(COMMA, ","));
start = i + 1;
}
if !s[start..].is_empty() {
nodes.push(token(TEXT, &s[start..]));
}
nodes
}
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(block_node_base)(input)
}
#[test]
fn test_parse() {
use crate::ast::{ExampleBlock, SourceBlock};
use crate::tests::to_ast;
let to_src_block = to_ast::<SourceBlock>(block_node);
let to_example_block = to_ast::<ExampleBlock>(block_node);
insta::assert_debug_snapshot!(
to_example_block(
r#"#+BEGIN_EXAMPLE
,* headline
,#+block
text
#+END_EXAMPLE"#
).syntax,
@r###"
EXAMPLE_BLOCK@0..59
BLOCK_BEGIN@0..16
TEXT@0..8 "#+BEGIN_"
TEXT@8..15 "EXAMPLE"
TEXT@15..15 ""
NEW_LINE@15..16 "\n"
BLOCK_CONTENT@16..42
COMMA@16..17 ","
TEXT@17..28 "* headline\n"
COMMA@28..29 ","
TEXT@29..42 "#+block\ntext\n"
BLOCK_END@42..59
WHITESPACE@42..46 " "
TEXT@46..52 "#+END_"
TEXT@52..59 "EXAMPLE"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC
#+END_SRC"#
).syntax,
@r###"
SOURCE_BLOCK@0..27
BLOCK_BEGIN@0..12
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
TEXT@11..11 ""
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13
NEW_LINE@12..13 "\n"
BLANK_LINE@13..14
NEW_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
TEXT@18..24 "#+END_"
TEXT@24..27 "SRC"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+begin_src
#+end_src"#
).syntax,
@r###"
SOURCE_BLOCK@0..25
BLOCK_BEGIN@0..12
TEXT@0..8 "#+begin_"
TEXT@8..11 "src"
TEXT@11..11 ""
NEW_LINE@11..12 "\n"
BLOCK_CONTENT@12..12
BLOCK_END@12..25
WHITESPACE@12..16 " "
TEXT@16..22 "#+end_"
TEXT@22..25 "src"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC javascript
alert('Hello World!');
#+END_SRC
"#).syntax,
@r###"
SOURCE_BLOCK@0..69
BLOCK_BEGIN@0..27
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
TEXT@11..22 " javascript"
WHITESPACE@22..26 " "
NEW_LINE@26..27 "\n"
BLOCK_CONTENT@27..50
TEXT@27..50 "alert('Hello World!');\n"
BLOCK_END@50..64
WHITESPACE@50..54 " "
TEXT@54..60 "#+END_"
TEXT@60..63 "SRC"
NEW_LINE@63..64 "\n"
BLANK_LINE@64..65
NEW_LINE@64..65 "\n"
BLANK_LINE@65..69
WHITESPACE@65..69 " "
"###
);
// TODO: more testing
}

137
src/syntax/clock.rs Normal file
View file

@ -0,0 +1,137 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, line_ending, space0},
combinator::{eof, map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, debug_assert_lossless, double_arrow_token, GreenElement,
NodeBuilder,
},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind,
};
pub fn clock_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
space0,
tag("CLOCK:"),
space0,
alt((timestamp_inactive_node, timestamp_active_node)),
opt(tuple((
space0,
double_arrow_token,
space0,
digit1,
colon_token,
digit1,
))),
space0,
alt((line_ending, eof)),
blank_lines,
)),
|(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(clock);
b.ws(ws_);
b.push(timestamp);
if let Some((ws, double_arrow, ws_, hour, colon, minute)) = duration {
b.ws(ws);
b.push(double_arrow);
b.ws(ws_);
b.text(hour);
b.push(colon);
b.text(minute);
}
b.ws(ws__);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::CLOCK)
},
))(input)
}
#[test]
fn parse() {
use crate::ast::Clock;
use crate::tests::to_ast;
let to_clock = to_ast::<Clock>(clock_node);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax,
@r###"
CLOCK@0..29
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..29
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
"###
);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax,
@r###"
CLOCK@0..64
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..53
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
MINUS2@29..31 "--"
L_BRACKET@31..32 "["
TIMESTAMP_YEAR@32..36 "2003"
MINUS@36..37 "-"
TIMESTAMP_MONTH@37..39 "09"
MINUS@39..40 "-"
TIMESTAMP_DAY@40..42 "16"
WHITESPACE@42..43 " "
TIMESTAMP_DAYNAME@43..46 "Tue"
WHITESPACE@46..47 " "
TIMESTAMP_HOUR@47..49 "10"
COLON@49..50 ":"
TIMESTAMP_MINUTE@50..52 "39"
R_BRACKET@52..53 "]"
WHITESPACE@53..54 " "
DOUBLE_ARROW@54..56 "=>"
WHITESPACE@56..58 " "
TEXT@58..59 "1"
COLON@59..60 ":"
TEXT@60..62 "00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64
NEW_LINE@63..64 "\n"
"###
);
}

259
src/syntax/combinator.rs Normal file
View file

@ -0,0 +1,259 @@
use std::iter::once;
use memchr::{memchr, memchr_iter};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{line_ending, space0},
combinator::eof,
sequence::tuple,
AsBytes, IResult, InputLength, InputTake, Parser,
};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*};
pub type GreenElement = NodeOrToken<GreenNode, GreenToken>;
#[inline]
pub fn token(kind: SyntaxKind, input: &str) -> GreenElement {
GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input))
}
#[inline]
pub fn node<I>(kind: SyntaxKind, children: I) -> GreenElement
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children))
}
macro_rules! token_parser {
($name:ident, $token:literal, $kind:ident) => {
#[doc = "Recognizes `"]
#[doc = $token]
#[doc = "` and returns GreenToken"]
pub fn $name(input: Input) -> IResult<Input, GreenElement, ()> {
let (i, o) = tag($token)(input)?;
Ok((i, token($kind, o.as_str())))
}
};
}
token_parser!(l_bracket_token, "[", L_BRACKET);
token_parser!(r_bracket_token, "]", R_BRACKET);
token_parser!(l_bracket2_token, "[[", L_BRACKET2);
token_parser!(r_bracket2_token, "]]", R_BRACKET2);
token_parser!(l_parens_token, "(", L_PARENS);
token_parser!(r_parens_token, ")", R_PARENS);
token_parser!(l_angle_token, "<", L_ANGLE);
token_parser!(r_angle_token, ">", R_ANGLE);
token_parser!(l_curly_token, "{", L_CURLY);
token_parser!(r_curly_token, "}", R_CURLY);
token_parser!(l_curly3_token, "{{{", L_CURLY3);
token_parser!(r_curly3_token, "}}}", R_CURLY3);
token_parser!(l_angle2_token, "<<", L_ANGLE2);
token_parser!(r_angle2_token, ">>", R_ANGLE2);
token_parser!(l_angle3_token, "<<<", L_ANGLE3);
token_parser!(r_angle3_token, ">>>", R_ANGLE3);
token_parser!(at_token, "@", AT);
token_parser!(at2_token, "@@", AT2);
token_parser!(minus2_token, "--", MINUS2);
// token_parser!(percent_token, "%", PERCENT);
token_parser!(percent2_token, "%%", PERCENT2);
// token_parser!(slash_token, "/", SLASH);
// token_parser!(underscore_token, "_", UNDERSCORE);
// token_parser!(star_token, "*", STAR);
token_parser!(plus_token, "+", PLUS);
token_parser!(minus_token, "-", MINUS);
token_parser!(colon_token, ":", COLON);
token_parser!(colon2_token, "::", COLON2);
token_parser!(pipe_token, "|", PIPE);
// token_parser!(equal_token, "=", EQUAL);
// token_parser!(tilde_token, "~", TILDE);
token_parser!(hash_plus_token, "#+", HASH_PLUS);
token_parser!(hash_token, "#", HASH);
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
pub fn debug_assert_lossless<'a, F>(
mut f: F,
) -> impl FnMut(Input<'a>) -> IResult<Input<'a>, GreenElement, ()>
where
F: Parser<Input<'a>, GreenElement, ()>,
{
move |input: Input| {
let (i, o) = f.parse(input)?;
debug_assert_eq!(
&input.as_str()[0..(input.input_len() - i.input_len())],
&o.to_string(),
"parser must be lossless"
);
Ok((i, o))
}
}
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut lines = vec![];
let mut i = input;
while !i.is_empty() {
match tuple::<_, _, (), _>((space0, alt((line_ending, eof))))(i) {
Ok((input, (ws, nl))) => {
let mut b = NodeBuilder::new();
b.ws(ws);
b.nl(nl);
lines.push(b.finish(BLANK_LINE));
i = input;
}
_ => break,
}
}
Ok((i, lines))
}
#[test]
fn test_blank_lines() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = blank_lines(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 5);
assert_eq!(output[0].to_string(), " \r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines((" \r\n\n\t\t\r\n \n t\n \r\n\n\t\t\r\n \n", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n \r\n\n\t\t\r\n \n");
assert_eq!(output.len(), 4);
assert_eq!(output[0].to_string(), " \r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding
pub fn trim_line_end(input: Input) -> IResult<Input, (Input, Input, Input), ()> {
let (input, line) = input.take_split(
memchr(b'\n', input.as_bytes())
.map(|i| i + 1)
.unwrap_or(input.input_len()),
);
let (ws_and_nl, contents) = line.take_split(
line.as_bytes()
.iter()
.rposition(|u| !u.is_ascii_whitespace())
.map(|i| i + 1)
.unwrap_or(0),
);
let (nl, ws) = space0(ws_and_nl)?;
Ok((input, (contents, ws, nl)))
}
#[test]
fn test_trim_line_end() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = trim_line_end(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) =
trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap();
assert_eq!(input.as_str(), "rest\n");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r\n");
}
/// Returns an iterator of positions of line start, including zero
pub fn line_starts_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
once(0).chain(memchr_iter(b'\n', s.as_bytes()).map(|i| i + 1))
}
/// Returns an iterator of positions of line end, including eof
pub fn line_ends_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
memchr_iter(b'\n', s.as_bytes())
.map(|i| i + 1)
.chain(once(s.len()))
}
pub struct NodeBuilder {
pub children: Vec<GreenElement>,
}
impl NodeBuilder {
pub fn new() -> NodeBuilder {
NodeBuilder { children: vec![] }
}
pub fn ws(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace()));
self.children.push(i.ws_token())
}
}
pub fn nl(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(
i.s == "\n" || i.s == "\r\n",
"{:?} should be a new line",
i.s
);
self.children.push(i.nl_token())
}
}
pub fn text(&mut self, i: Input) {
self.children.push(i.text_token())
}
pub fn token(&mut self, kind: SyntaxKind, i: Input) {
self.children.push(i.token(kind))
}
pub fn push(&mut self, elem: GreenElement) {
self.children.push(elem)
}
pub fn push_opt(&mut self, elem: Option<GreenElement>) {
if let Some(elem) = elem {
self.children.push(elem)
}
}
pub fn len(&self) -> usize {
self.children.len()
}
pub fn finish(self, kind: SyntaxKind) -> GreenElement {
GreenElement::Node(GreenNode::new(kind.into(), self.children))
}
}

85
src/syntax/comment.rs Normal file
View file

@ -0,0 +1,85 @@
use nom::{IResult, InputTake};
use super::{
combinator::{blank_lines, debug_assert_lossless, line_ends_iter, node, GreenElement},
input::Input,
SyntaxKind,
};
fn comment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = &input.as_str()[start..i];
let trimmed = line.trim_start();
if trimmed == "#" || trimmed == "#\n" || trimmed == "#\r\n" || trimmed.starts_with("# ") {
start = i;
} else {
break;
}
}
if start == 0 {
return Err(nom::Err::Error(()));
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.push(contents.text_token());
children.extend(post_blank);
Ok((input, node(SyntaxKind::COMMENT, children)))
}
pub fn comment_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(comment_node_base)(input)
}
#[test]
fn parse() {
use crate::{
syntax::{comment::comment_node, input::Input, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
SyntaxNode::new_root(
comment_node(Input {
s: input,
c: &ParseConfig::default(),
})
.unwrap()
.1
.into_node()
.unwrap(),
)
};
insta::assert_debug_snapshot!(
t("#"),
@r###"
COMMENT@0..1
TEXT@0..1 "#"
"###
);
insta::assert_debug_snapshot!(
t("#\n # a\n #\n\n"),
@r###"
COMMENT@0..12
TEXT@0..11 "#\n # a\n #\n"
BLANK_LINE@11..12
NEW_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
t("#\na\n #\n\n"),
@r###"
COMMENT@0..2
TEXT@0..2 "#\n"
"###
);
}

144
src/syntax/cookie.rs Normal file
View file

@ -0,0 +1,144 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::map,
sequence::{pair, separated_pair, tuple},
IResult,
};
use super::{
combinator::{
debug_assert_lossless, l_bracket_token, node, r_bracket_token, token, GreenElement,
},
input::Input,
SyntaxKind::*,
};
pub fn cookie_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_bracket_token,
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
r_bracket_token,
)),
|(l_bracket, value, r_bracket)| {
let mut children = vec![l_bracket];
children.push(token(TEXT, value.0.as_str()));
match value.1.as_str() {
"%" => {
children.push(token(PERCENT, value.1.as_str()));
}
_ => {
children.push(token(SLASH, "/"));
children.push(token(TEXT, value.1.as_str()));
}
}
children.push(r_bracket);
node(COOKIE, children)
},
))(input)
}
#[test]
fn parse() {
use crate::ast::Cookie;
use crate::tests::to_ast;
use crate::ParseConfig;
let to_cookie = to_ast::<Cookie>(cookie_node);
insta::assert_debug_snapshot!(
to_cookie("[1/10]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..5 "10"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[1/1000]").syntax,
@r###"
COOKIE@0..8
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..7 "1000"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[10%]").syntax,
@r###"
COOKIE@0..5
L_BRACKET@0..1 "["
TEXT@1..3 "10"
PERCENT@3..4 "%"
R_BRACKET@4..5 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[%]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
PERCENT@1..2 "%"
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..2 ""
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[100/]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..4 "100"
SLASH@4..5 "/"
TEXT@5..5 ""
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/100]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..5 "100"
R_BRACKET@5..6 "]"
"###
);
let config = &ParseConfig::default();
assert!(cookie_node(("[10% ]", config).into()).is_err());
assert!(cookie_node(("[1//100]", config).into()).is_err());
assert!(cookie_node(("[1\\100]", config).into()).is_err());
assert!(cookie_node(("[10%%]", config).into()).is_err());
}

128
src/syntax/document.rs Normal file
View file

@ -0,0 +1,128 @@
use nom::{
combinator::{iterator, opt},
IResult,
};
use super::{
combinator::{blank_lines, debug_assert_lossless, node, GreenElement},
headline::{headline_node, section_node},
input::Input,
SyntaxKind::*,
};
pub fn document_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(document_node_base)(input)
}
fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![];
let (input, pre_blank) = blank_lines(input)?;
children.extend(pre_blank);
let (input, section) = opt(section_node)(input)?;
if let Some(section) = section {
children.push(section);
}
let mut it = iterator(input, headline_node);
children.extend(&mut it);
let (input, _) = it.finish()?;
debug_assert!(input.is_empty());
Ok((input, node(DOCUMENT, children)))
}
#[test]
fn parse() {
use crate::ast::Document;
use crate::tests::to_ast;
let to_document = to_ast::<Document>(document_node);
insta::assert_debug_snapshot!(
to_document("").syntax,
@r###"
DOCUMENT@0..0
"###
);
insta::assert_debug_snapshot!(
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@1..4
WHITESPACE@1..3 " "
NEW_LINE@3..4 "\n"
BLANK_LINE@4..5
NEW_LINE@4..5 "\n"
"###
);
insta::assert_debug_snapshot!(
to_document("section").syntax,
@r###"
DOCUMENT@0..7
SECTION@0..7
PARAGRAPH@0..7
TEXT@0..7 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
HEADLINE_TITLE@3..10
TEXT@3..10 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "
HEADLINE_TITLE@4..13
TEXT@4..13 "heading 2"
NEW_LINE@13..14 "\n"
HEADLINE@14..25
HEADLINE_STARS@14..15 "*"
WHITESPACE@15..16 " "
HEADLINE_TITLE@16..25
TEXT@16..25 "heading 1"
"###
);
insta::assert_debug_snapshot!(
to_document("section\n** heading 2\n*heading 1").syntax,
@r###"
DOCUMENT@0..31
SECTION@0..8
PARAGRAPH@0..8
TEXT@0..8 "section\n"
HEADLINE@8..31
HEADLINE_STARS@8..10 "**"
WHITESPACE@10..11 " "
HEADLINE_TITLE@11..20
TEXT@11..20 "heading 2"
NEW_LINE@20..21 "\n"
SECTION@21..31
PARAGRAPH@21..31
TEXT@21..31 "*heading 1"
"###
);
}

200
src/syntax/drawer.rs Normal file
View file

@ -0,0 +1,200 @@
use nom::{
branch::alt,
bytes::complete::{tag_no_case, take_while1},
character::complete::{line_ending, space0, space1},
combinator::{eof, iterator, map, opt},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, colon_token, debug_assert_lossless, line_starts_iter, node, plus_token,
trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn drawer_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let mut b = NodeBuilder::new();
let (input, (ws, colon, name, colon_, ws_, nl)) = tuple((
space0,
colon_token,
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
colon_token,
space0,
alt((line_ending, eof)),
))(input)?;
b.ws(ws);
b.push(colon);
b.text(name);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(DRAWER_BEGIN), name.as_str())))
}
fn drawer_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, colon, end, colon_, ws_, nl)) = tuple((
space0,
colon_token,
tag_no_case("END"),
colon_token,
space0,
alt((line_ending, eof)),
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(end);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DRAWER_END)))
}
fn drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, _)) = drawer_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = drawer_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DRAWER, children)));
}
}
Err(nom::Err::Error(()))
}
fn property_drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, name)) = drawer_begin_node(input)?;
if name != "PROPERTIES" {
return Err(nom::Err::Error(()));
}
let mut children = vec![begin];
let mut it = iterator(input, node_property_node);
children.extend(&mut it);
let (input, _) = it.finish()?;
let (input, end) = drawer_end_node(input)?;
children.push(end);
Ok((input, node(PROPERTY_DRAWER, children)))
}
fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
map(
tuple((
space0,
colon_token,
take_while1(|c| c != ':' && c != '+'),
opt(plus_token),
colon_token,
space1,
trim_line_end,
)),
|(ws, colon, name, plus, colon_, ws_, (value, ws__, nl))| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(name);
b.push_opt(plus);
b.push(colon_);
b.ws(ws_);
b.text(value);
b.ws(ws__);
b.nl(nl);
b.finish(NODE_PROPERTY)
},
)(input)
}
#[tracing::instrument(skip(input), fields(input = input.s))]
pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(property_drawer_node_base)(input)
}
#[tracing::instrument(skip(input), fields(input = input.s))]
pub fn drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(drawer_node_base)(input)
}
#[test]
fn parse() {
use crate::{ast::Drawer, tests::to_ast, ParseConfig};
let to_drawer = to_ast::<Drawer>(drawer_node);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:CUSTOM_ID: id
:END:"#
).syntax,
@r###"
DRAWER@0..33
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
TEXT@9..26 " :CUSTOM_ID: id\n"
DRAWER_END@26..33
WHITESPACE@26..28 " "
COLON@28..29 ":"
TEXT@29..32 "END"
COLON@32..33 ":"
"###
);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:END:
"#
).syntax,
@r###"
DRAWER@0..19
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10
NEW_LINE@9..10 "\n"
TEXT@10..10 ""
DRAWER_END@10..18
WHITESPACE@10..12 " "
COLON@12..13 ":"
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19
NEW_LINE@18..19 "\n"
"###
);
let config = &ParseConfig::default();
// https://github.com/PoiScript/orgize/issues/9
assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err());
}

112
src/syntax/dyn_block.rs Normal file
View file

@ -0,0 +1,112 @@
use nom::{
branch::alt,
bytes::complete::tag_no_case,
character::complete::{alpha1, line_ending, space0, space1},
combinator::eof,
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, debug_assert_lossless, line_starts_iter, node, trim_line_end, GreenElement,
NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn dyn_block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, begin) = dyn_block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = dyn_block_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DYN_BLOCK, children)));
}
}
Err(nom::Err::Error(()))
}
fn dyn_block_begin_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple((
space0,
tag_no_case("#+BEGIN:"),
space1,
alpha1,
trim_line_end,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(begin);
b.ws(ws_);
b.text(name);
b.text(args);
b.ws(ws__);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_BEGIN)))
}
fn dyn_block_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, end, ws_, nl)) = tuple((
space0,
tag_no_case("#+END:"),
space0,
alt((line_ending, eof)),
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_END)))
}
pub fn dyn_block_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(dyn_block_node_base)(input)
}
#[test]
fn parse() {
use crate::{ast::DynBlock, tests::to_ast};
let to_dyn_block = to_ast::<DynBlock>(dyn_block_node);
insta::assert_debug_snapshot!(
to_dyn_block(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#).syntax,
@r###"
DYN_BLOCK@0..53
DYN_BLOCK_BEGIN@0..32
TEXT@0..8 "#+BEGIN:"
WHITESPACE@8..9 " "
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53
WHITESPACE@49..53 " "
"###
);
}

235
src/syntax/element.rs Normal file
View file

@ -0,0 +1,235 @@
use nom::{AsBytes, IResult, InputTake};
use super::{
block::block_node,
clock::clock_node,
combinator::{line_starts_iter, GreenElement},
comment::comment_node,
drawer::drawer_node,
dyn_block::dyn_block_node,
fixed_width::fixed_width_node,
fn_def::fn_def_node,
input::Input,
keyword::keyword_node,
list::list_node,
paragraph::paragraph_nodes,
rule::rule_node,
table::{org_table_node, table_el_node},
};
/// Parses input into multiple element
#[tracing::instrument(skip(input), fields(input = input.s))]
pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
// TODO:
// debug_assert!(!input.is_empty());
let nodes = element_nodes_base(input)?;
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, n| s + &n.to_string()),
"parser must be lossless"
);
Ok(nodes)
}
/// Parses input into multiple elements
///
/// input must not contains blank line in the beginning
fn element_nodes_base(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
#[derive(PartialEq, Eq)]
enum PreviousLine {
None,
BlankLine,
AffiliatedKeyword,
Other,
}
let mut children = vec![];
let mut i = input;
let mut previous_line = PreviousLine::None;
'l: loop {
for (input, head) in line_starts_iter(i.as_str()).map(|idx| i.take_split(idx)) {
// find the first byte that's not a whitespace
let trimmed = input.as_str().trim_start_matches(|c| c == ' ' || c == '\t');
// if this line is an affiliated keyword, that skip it
if is_affiliated_keyword(trimmed) {
if previous_line == PreviousLine::BlankLine {
children.extend(paragraph_nodes(head)?);
}
previous_line = PreviousLine::AffiliatedKeyword;
continue;
}
// if this line is a blank line
if is_blank_line(trimmed) {
if previous_line == PreviousLine::AffiliatedKeyword {
previous_line = PreviousLine::BlankLine;
if let Ok((input, node)) = keyword_node(input) {
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
children.push(node);
i = input;
continue 'l;
}
}
continue;
}
if let Ok((input, node)) = match trimmed.bytes().next() {
Some(b'[') => fn_def_node(input),
Some(b'0'..=b'9') | Some(b'*') => list_node(input),
Some(b'C') => clock_node(input),
Some(b'-') => rule_node(input).or_else(|_| list_node(input)),
Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)),
Some(b'|') => org_table_node(input),
Some(b'+') => table_el_node(input).or_else(|_| list_node(input)),
Some(b'#') => block_node(input)
.or_else(|_| keyword_node(input))
.or_else(|_| dyn_block_node(input))
.or_else(|_| comment_node(input)),
_ => Err(nom::Err::Error(())),
} {
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
children.push(node);
i = input;
continue 'l;
}
}
break;
}
if !i.is_empty() {
children.extend(paragraph_nodes(i)?);
}
Ok(children)
}
pub fn is_affiliated_keyword(line: &str) -> bool {
line.starts_with("#+CAPTION:")
|| line.starts_with("#+DATA:")
|| line.starts_with("#+HEADER:")
|| line.starts_with("#+HEADERS:")
|| line.starts_with("#+LABEL:")
|| line.starts_with("#+NAME:")
|| line.starts_with("#+PLOT:")
|| line.starts_with("#+RESNAME:")
|| line.starts_with("#+RESULT:")
|| line.starts_with("#+RESULTS:")
|| line.starts_with("#+SOURCE:")
|| line.starts_with("#+SRCNAME:")
|| line.starts_with("#+TBLNAME:")
|| line.starts_with("#+ATTR_")
}
pub fn is_blank_line(line: &str) -> bool {
matches!(line.bytes().next(), None | Some(b'\n') | Some(b'\r'))
}
pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut has_affiliated_keyword = false;
for offset in line_starts_iter(input.as_str()) {
// find the first byte that's not a whitespace
let Some(idx) = input.as_bytes()[offset..]
.iter()
.position(|b| *b != b' ' && *b != b'\t')
else {
break;
};
let line = &input.as_str()[(idx + offset)..];
// if this line is an affiliated keyword, that we skip it
if line.starts_with("#+CAPTION:")
|| line.starts_with("#+DATA:")
|| line.starts_with("#+HEADER:")
|| line.starts_with("#+HEADERS:")
|| line.starts_with("#+LABEL:")
|| line.starts_with("#+NAME:")
|| line.starts_with("#+PLOT:")
|| line.starts_with("#+RESNAME:")
|| line.starts_with("#+RESULT:")
|| line.starts_with("#+RESULTS:")
|| line.starts_with("#+SOURCE:")
|| line.starts_with("#+SRCNAME:")
|| line.starts_with("#+TBLNAME:")
|| line.starts_with("#+ATTR_")
{
has_affiliated_keyword = true;
continue;
}
return match input.as_bytes()[idx + offset] {
b'[' => fn_def_node(input),
b'0'..=b'9' | b'*' => list_node(input),
b'C' => clock_node(input),
b'-' => rule_node(input).or_else(|_| list_node(input)),
b':' => drawer_node(input).or_else(|_| fixed_width_node(input)),
b'|' => org_table_node(input),
b'+' => table_el_node(input).or_else(|_| list_node(input)),
b'#' => block_node(input)
.or_else(|_| keyword_node(input))
.or_else(|_| dyn_block_node(input))
.or_else(|_| comment_node(input)),
_ => Err(nom::Err::Error(())),
};
}
// we find an affiliated keyword, but it's not followed by any element
// in this case, we treat it as a simple keyword
return Err(nom::Err::Error(()));
}
#[test]
fn parse() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
insta::assert_debug_snapshot!(
t(r#"a
b"#),
@r###"
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###
);
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
@r###"
SECTION@0..41
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
PARAGRAPH@26..41
LINK@26..41
L_BRACKET2@26..28 "[["
LINK_PATH@28..39 "./img/a.jpg"
R_BRACKET2@39..41 "]]"
"###
)
}

146
src/syntax/emphasis.rs Normal file
View file

@ -0,0 +1,146 @@
use bytecount::count;
use memchr::memchr_iter;
use nom::{combinator::map, AsBytes, IResult, Slice};
use super::{
combinator::{debug_assert_lossless, node, token, GreenElement},
input::Input,
object::object_nodes,
SyntaxKind::*,
};
pub fn bold_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
children.extend(object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
}))(input)
}
pub fn code_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'~'), |contents| {
node(
CODE,
[token(TILDE, "~"), contents.text_token(), token(TILDE, "~")],
)
}))(input)
}
pub fn strike_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
children.extend(object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
}))(input)
}
pub fn verbatim_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'='), |contents| {
node(
VERBATIM,
[token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")],
)
}))(input)
}
pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
children.extend(object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
}))(input)
}
pub fn italic_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
children.extend(object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
}))(input)
}
fn emphasis(marker: u8) -> impl Fn(Input) -> IResult<Input, Input, ()> {
move |input: Input| {
let bytes = input.as_bytes();
if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() {
return Err(nom::Err::Error(()));
}
for idx in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if idx == 1 {
continue;
} else if count(&bytes[1..idx], b'\n') >= 2 {
break;
} else if validate_marker(idx, input) {
return Ok((input.slice(idx + 1..), input.slice(1..idx)));
}
}
Err(nom::Err::Error(()))
}
}
fn validate_marker(pos: usize, text: Input) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(post) = text.as_bytes().get(pos + 1) {
[
b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}',
b'[',
]
.contains(post)
} else {
true
}
}
#[test]
fn parse() {
use crate::{ast::Bold, tests::to_ast, ParseConfig};
let to_bold = to_ast::<Bold>(bold_node);
insta::assert_debug_snapshot!(
to_bold("*bold*").syntax,
@r###"
BOLD@0..6
STAR@0..1 "*"
TEXT@1..5 "bold"
STAR@5..6 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo*ld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo*ld"
STAR@6..7 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo\nld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo\nld"
STAR@6..7 "*"
"###
);
let config = &ParseConfig::default();
assert!(bold_node(("*bold*a", config).into()).is_err());
assert!(bold_node(("*bold *", config).into()).is_err());
assert!(bold_node(("* bold*", config).into()).is_err());
assert!(bold_node(("*b\nol\nd*", config).into()).is_err());
assert!(italic_node(("*bold*", config).into()).is_err());
}

64
src/syntax/fixed_width.rs Normal file
View file

@ -0,0 +1,64 @@
use nom::{IResult, InputTake};
use super::{
combinator::{blank_lines, debug_assert_lossless, line_ends_iter, node, GreenElement},
input::Input,
SyntaxKind,
};
fn fixed_width_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = &input.s[start..i];
let trimmed = line.trim_start();
if trimmed == ":" || trimmed == ":\n" || trimmed == ":\r\n" || trimmed.starts_with(": ") {
start = i;
} else {
break;
}
}
if start == 0 {
return Err(nom::Err::Error(()));
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.push(contents.text_token());
children.extend(post_blank);
Ok((input, node(SyntaxKind::FIXED_WIDTH, children)))
}
pub fn fixed_width_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(fixed_width_node_base)(input)
}
#[test]
fn parse() {
use crate::{ast::FixedWidth, tests::to_ast};
let to_fixed_width = to_ast::<FixedWidth>(fixed_width_node);
insta::assert_debug_snapshot!(
to_fixed_width(
r#": A
:
: B
: C
"#
).syntax,
@r###"
FIXED_WIDTH@0..19
TEXT@0..14 ": A\n:\n: B\n: C\n"
BLANK_LINE@14..15
NEW_LINE@14..15 "\n"
BLANK_LINE@15..19
WHITESPACE@15..19 " "
"###
);
}

154
src/syntax/fn_def.rs Normal file
View file

@ -0,0 +1,154 @@
use nom::{
bytes::complete::{tag, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, debug_assert_lossless, l_bracket_token, r_bracket_token,
trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
#[tracing::instrument(skip(input), fields(input = input.s))]
pub fn fn_def_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
affiliated_keyword_nodes,
l_bracket_token,
tag("fn"),
colon_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
r_bracket_token,
trim_line_end,
blank_lines,
)),
|(
affiliated_keywords,
l_bracket,
fn_,
colon,
label,
r_bracket,
(content, ws_, nl),
post_blank,
)| {
let mut b = NodeBuilder::new();
b.children.extend(affiliated_keywords);
b.push(l_bracket);
b.text(fn_);
b.push(colon);
b.text(label);
b.push(r_bracket);
b.text(content);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::FN_DEF)
},
))(input)
}
#[test]
fn parse() {
use crate::ParseConfig;
use crate::{ast::FnDef, tests::to_ast};
let to_fn_def = to_ast::<FnDef>(fn_def_node);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..26
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..26 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:word_1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "word_1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "WORD-1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD]").syntax,
@r###"
FN_DEF@0..9
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..8 "WORD"
R_BRACKET@8..9 "]"
TEXT@9..9 ""
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax,
@r###"
FN_DEF@0..66
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..65 " In particular, the p ..."
NEW_LINE@65..66 "\n"
"###
);
let config = &ParseConfig::default();
assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err());
insta::assert_debug_snapshot!(
to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..45
KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..10 "ATTR_poi"
COLON@10..11 ":"
TEXT@11..13 " 1"
NEW_LINE@13..14 "\n"
L_BRACKET@14..15 "["
TEXT@15..17 "fn"
COLON@17..18 ":"
TEXT@18..24 "WORD-1"
R_BRACKET@24..25 "]"
TEXT@25..45 " https://orgmode.org"
"###
);
}

120
src/syntax/fn_ref.rs Normal file
View file

@ -0,0 +1,120 @@
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::tuple,
AsBytes, Err, IResult, InputTake,
};
use super::{
combinator::{
colon_token, debug_assert_lossless, l_bracket_token, node, r_bracket_token, GreenElement,
},
input::Input,
object::object_nodes,
SyntaxKind::*,
};
pub fn fn_ref_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(fn_ref_node_base)(input)
}
fn fn_ref_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple((
l_bracket_token,
tag("fn"),
colon_token,
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
opt(tuple((colon_token, balanced_brackets))),
r_bracket_token,
))(input)?;
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
children.extend(object_nodes(definition));
}
children.push(r_bracket);
Ok((input, node(FN_REF, children)))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'[', b']', bytes) {
if bytes[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(Err::Error(()))
}
#[test]
fn parse() {
use crate::{ast::FnRef, tests::to_ast, ParseConfig};
let to_fn_ref = to_ast::<FnRef>(fn_ref_node);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1]").syntax,
@r###"
FN_REF@0..6
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1:2]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
COLON@5..6 ":"
TEXT@6..7 "2"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::2]").syntax,
@r###"
FN_REF@0..7
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..6 "2"
R_BRACKET@6..7 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::[]]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..7 "[]"
R_BRACKET@7..8 "]"
"###
);
let config = &ParseConfig::default();
assert!(fn_ref_node(("[fn::[]", config).into()).is_err());
}

350
src/syntax/headline.rs Normal file
View file

@ -0,0 +1,350 @@
use memchr::memrchr_iter;
use nom::{
bytes::complete::take_while1,
character::complete::{anychar, space0},
combinator::{map, opt, verify},
sequence::tuple,
AsBytes, IResult, InputLength, InputTake, Slice,
};
use tracing::instrument;
use super::{
combinator::{
debug_assert_lossless, hash_token, l_bracket_token, line_starts_iter, node,
r_bracket_token, token, trim_line_end, GreenElement, NodeBuilder,
},
drawer::property_drawer_node,
element::element_nodes,
input::Input,
object::object_nodes,
planning::planning_node,
SyntaxKind::*,
};
pub fn headline_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(headline_node_base)(input)
}
#[instrument(skip(input), fields(input = input.s))]
fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, stars) = headline_stars(input)?;
let mut b = NodeBuilder::new();
b.token(HEADLINE_STARS, stars);
let (input, ws) = space0(input)?;
b.ws(ws);
let (input, headline_keyword) = opt(headline_keyword_token)(input)?;
if let Some((headline_keyword, ws)) = headline_keyword {
b.push(headline_keyword);
b.ws(ws);
}
let (input, headline_priority) = opt(headline_priority_node)(input)?;
if let Some((headline_priority, ws)) = headline_priority {
b.push(headline_priority);
b.ws(ws);
}
let (input, (title_and_tags, ws_, nl)) = trim_line_end(input)?;
let (title, tags) = opt(headline_tags_node)(title_and_tags)?;
if !title.is_empty() {
b.push(node(HEADLINE_TITLE, object_nodes(title)));
}
b.push_opt(tags);
b.ws(ws_);
b.nl(nl);
if nl.is_empty() {
return Ok((input, b.finish(HEADLINE)));
}
let (input, planning) = opt(planning_node)(input)?;
b.push_opt(planning);
let (input, property_drawer) = opt(property_drawer_node)(input)?;
b.push_opt(property_drawer);
let (input, section) = opt(section_node)(input)?;
b.push_opt(section);
let mut i = input;
let current_level = stars.input_len();
while !i.is_empty() {
let next_level = i.bytes().take_while(|&c| c == b'*').count();
if next_level <= current_level {
break;
}
let (input, headline) = headline_node(i)?;
b.push(headline);
i = input;
}
Ok((i, b.finish(HEADLINE)))
}
#[instrument(skip(input), fields(input = input.s))]
pub fn section_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, section) = section_text(input)?;
Ok((input, node(SECTION, element_nodes(section)?)))
}
pub fn section_text(input: Input) -> IResult<Input, Input, ()> {
if input.is_empty() {
return Err(nom::Err::Error(()));
}
for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if headline_stars(input).is_ok() {
if section.is_empty() {
return Err(nom::Err::Error(()));
}
return Ok((input, section));
}
}
Ok(input.take_split(input.input_len()))
}
#[instrument(skip(input), fields(input = input.s))]
fn headline_stars(input: Input) -> IResult<Input, Input, ()> {
let bytes = input.as_bytes();
let level = bytes.iter().take_while(|&&c| c == b'*').count();
if level == 0 {
Err(nom::Err::Error(()))
} else if input.input_len() == level {
Ok(input.take_split(level))
} else if bytes[level] == b'\n' || bytes[level] == b'\r' || bytes[level] == b' ' {
Ok(input.take_split(level))
} else {
Err(nom::Err::Error(()))
}
}
#[instrument(skip(input), fields(input = input.s))]
fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
if !input.s.ends_with(':') {
return Err(nom::Err::Error(()));
};
let bytes = input.as_bytes();
// we're going to skip to first colon, so we start from the
// second last character
let mut i = input.input_len() - 1;
let mut can_not_be_ws = true;
let mut children = vec![token(COLON, ":")];
for ii in memrchr_iter(b':', bytes).skip(1) {
let item = &bytes[ii + 1..i];
if item.is_empty() {
children.push(token(COLON, ":"));
can_not_be_ws = false;
i = ii;
} else if item
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'_' || c == b'@' || c == b'#' || c == b'%')
{
children.push(input.slice(ii + 1..i).text_token());
children.push(token(COLON, ":"));
can_not_be_ws = false;
i = ii;
} else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws {
children.push(input.slice(ii + 1..i).ws_token());
children.push(token(COLON, ":"));
can_not_be_ws = true;
i = ii;
} else {
break;
}
}
if children.len() == 1 {
return Err(nom::Err::Error(()));
}
if i != 0 && bytes[i - 1] != b' ' && bytes[i - 1] != b'\t' {
return Err(nom::Err::Error(()));
}
// we parse headline tag from right to left,
// so we need to reverse the result after it finishes
children.reverse();
Ok((input.slice(0..i), node(HEADLINE_TAGS, children)))
}
fn headline_keyword_token(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, word) = verify(
take_while1(|c: char| !c.is_ascii_whitespace()),
|input: &Input| {
let Input { c, s } = input;
c.todo_keywords.0.iter().any(|k| k == s) || c.todo_keywords.1.iter().any(|k| k == s)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (word.token(HEADLINE_KEYWORD), ws)))
}
fn headline_priority_node(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((l_bracket_token, hash_token, anychar, r_bracket_token)),
|(l_bracket, hash, char, r_bracket)| {
node(
HEADLINE_PRIORITY,
[l_bracket, hash, token(TEXT, &char.to_string()), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
#[test]
fn parse() {
use crate::{ast::Headline, tests::to_ast};
let to_headline = to_ast::<Headline>(headline_node);
let hdl = to_headline("* foo");
insta::assert_debug_snapshot!(
hdl.syntax,
@r###"
HEADLINE@0..5
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_TITLE@2..5
TEXT@2..5 "foo"
"###
);
let hdl = to_headline("* foo\n\n** bar");
insta::assert_debug_snapshot!(
hdl.syntax,
@r###"
HEADLINE@0..13
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_TITLE@2..5
TEXT@2..5 "foo"
NEW_LINE@5..6 "\n"
SECTION@6..7
PARAGRAPH@6..7
BLANK_LINE@6..7
NEW_LINE@6..7 "\n"
HEADLINE@7..13
HEADLINE_STARS@7..9 "**"
WHITESPACE@9..10 " "
HEADLINE_TITLE@10..13
TEXT@10..13 "bar"
"###
);
let hdl = to_headline("* TODO foo\nbar\n** baz\n");
assert_eq!(hdl.level(), Some(1));
assert_eq!(hdl.keyword().as_ref().map(|x| x.text()), Some("TODO"));
insta::assert_debug_snapshot!(
hdl.syntax,
@r###"
HEADLINE@0..22
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_KEYWORD@2..6 "TODO"
WHITESPACE@6..7 " "
HEADLINE_TITLE@7..10
TEXT@7..10 "foo"
NEW_LINE@10..11 "\n"
SECTION@11..15
PARAGRAPH@11..15
TEXT@11..15 "bar\n"
HEADLINE@15..22
HEADLINE_STARS@15..17 "**"
WHITESPACE@17..18 " "
HEADLINE_TITLE@18..21
TEXT@18..21 "baz"
NEW_LINE@21..22 "\n"
"###
);
let hdl = to_headline("** [#A] foo\n* baz");
assert_eq!(hdl.level(), Some(2));
assert_eq!(
hdl.priority().unwrap().text_string().unwrap(),
"A".to_string()
);
insta::assert_debug_snapshot!(
hdl.syntax,
@r###"
HEADLINE@0..12
HEADLINE_STARS@0..2 "**"
WHITESPACE@2..3 " "
HEADLINE_PRIORITY@3..7
L_BRACKET@3..4 "["
HASH@4..5 "#"
TEXT@5..6 "A"
R_BRACKET@6..7 "]"
WHITESPACE@7..8 " "
HEADLINE_TITLE@8..11
TEXT@8..11 "foo"
NEW_LINE@11..12 "\n"
"###
);
}
#[test]
fn issue_15_16() {
use crate::{ast::Headline, tests::to_ast};
let to_headline = to_ast::<Headline>(headline_node);
let tags = to_headline("* a ::").tags().unwrap();
assert_eq!(tags.iter().count(), 0);
// let tags = to_headline("* a :(:").tags().unwrap();
// assert_eq!(tags.iter().count(), 0);
let tags = to_headline("* a \t:_:").tags().unwrap();
assert_eq!(
vec!["_".to_string()],
tags.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a \t :@:").tags().unwrap();
assert_eq!(
vec!["@".to_string()],
tags.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a :#:").tags().unwrap();
assert_eq!(
vec!["#".to_string()],
tags.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a\t :%:").tags().unwrap();
assert_eq!(
vec!["%".to_string()],
tags.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
);
// let tags = to_headline("* a :余:").tags().unwrap();
// assert_eq!(
// vec!["余".to_string()],
// tags.iter().map(|x| x.to_string()).collect::<Vec<_>>(),
// );
}

126
src/syntax/inline_call.rs Normal file
View file

@ -0,0 +1,126 @@
use nom::{
bytes::complete::{tag, take_till},
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
debug_assert_lossless, l_bracket_token, l_parens_token, node, r_bracket_token,
r_parens_token, GreenElement,
},
input::Input,
SyntaxKind,
};
pub fn inline_call_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
tag("call_"),
take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'),
opt(tuple((
l_bracket_token,
take_till(|c| c == ']' || c == '\n'),
r_bracket_token,
))),
l_parens_token,
take_till(|c| c == ')' || c == '\n'),
r_parens_token,
opt(tuple((
l_bracket_token,
take_till(|c| c == ']' || c == '\n'),
r_bracket_token,
))),
)),
|(call, name, inside_header, l_paren, arguments, r_paren, end_header)| {
let mut children = vec![call.text_token()];
children.push(name.text_token());
if let Some((l_bracket, header, r_bracket)) = inside_header {
children.push(l_bracket);
children.push(header.text_token());
children.push(r_bracket);
}
children.push(l_paren);
children.push(arguments.text_token());
children.push(r_paren);
if let Some((l_bracket, header, r_bracket)) = end_header {
children.push(l_bracket);
children.push(header.text_token());
children.push(r_bracket);
}
node(SyntaxKind::INLINE_CALL, children)
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::InlineCall, tests::to_ast};
let to_inline_call = to_ast::<InlineCall>(inline_call_node);
let call = to_inline_call("call_square(4)");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..14
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_PARENS@11..12 "("
TEXT@12..13 "4"
R_PARENS@13..14 ")"
"###
);
let call = to_inline_call("call_square[:results output](4)");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..31
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_BRACKET@11..12 "["
TEXT@12..27 ":results output"
R_BRACKET@27..28 "]"
L_PARENS@28..29 "("
TEXT@29..30 "4"
R_PARENS@30..31 ")"
"###
);
let call = to_inline_call("call_square(4)[:results html]");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..29
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_PARENS@11..12 "("
TEXT@12..13 "4"
R_PARENS@13..14 ")"
L_BRACKET@14..15 "["
TEXT@15..28 ":results html"
R_BRACKET@28..29 "]"
"###
);
let call = to_inline_call("call_square[:results output](4)[:results html]");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..46
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_BRACKET@11..12 "["
TEXT@12..27 ":results output"
R_BRACKET@27..28 "]"
L_PARENS@28..29 "("
TEXT@29..30 "4"
R_PARENS@30..31 ")"
L_BRACKET@31..32 "["
TEXT@32..45 ":results html"
R_BRACKET@45..46 "]"
"###
);
}

84
src/syntax/inline_src.rs Normal file
View file

@ -0,0 +1,84 @@
use nom::{
bytes::complete::{tag, take_till, take_while1},
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
debug_assert_lossless, l_bracket_token, l_curly_token, node, r_bracket_token,
r_curly_token, GreenElement,
},
input::Input,
SyntaxKind,
};
pub fn inline_src_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
tag("src_"),
take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{'),
opt(tuple((
l_bracket_token,
take_till(|c| c == '\n' || c == ']'),
r_bracket_token,
))),
l_curly_token,
take_till(|c| c == '\n' || c == '}'),
r_curly_token,
)),
|(src, lang, options, l_curly, body, r_curly)| {
let mut children = vec![src.text_token(), lang.text_token()];
if let Some((l_bracket, options, r_bracket)) = options {
children.push(l_bracket);
children.push(options.text_token());
children.push(r_bracket);
}
children.push(l_curly);
children.push(body.text_token());
children.push(r_curly);
node(SyntaxKind::INLINE_SRC, children)
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::InlineSrc, tests::to_ast, ParseConfig};
let to_inline_src = to_ast::<InlineSrc>(inline_src_node);
insta::assert_debug_snapshot!(
to_inline_src("src_C{int a = 0;}").syntax,
@r###"
INLINE_SRC@0..17
TEXT@0..4 "src_"
TEXT@4..5 "C"
L_CURLY@5..6 "{"
TEXT@6..16 "int a = 0;"
R_CURLY@16..17 "}"
"###
);
insta::assert_debug_snapshot!(
to_inline_src("src_xml[:exports code]{<tag>text</tag>}").syntax,
@r###"
INLINE_SRC@0..39
TEXT@0..4 "src_"
TEXT@4..7 "xml"
L_BRACKET@7..8 "["
TEXT@8..21 ":exports code"
R_BRACKET@21..22 "]"
L_CURLY@22..23 "{"
TEXT@23..38 "<tag>text</tag>"
R_CURLY@38..39 "}"
"###
);
let config = &ParseConfig::default();
assert!(inline_src_node(("src_xml[:exports code]{<tag>text</tag>", config).into()).is_err());
assert!(inline_src_node(("src_[:exports code]{<tag>text</tag>}", config).into()).is_err());
assert!(inline_src_node(("src_xml[:exports code]", config).into()).is_err());
}

250
src/syntax/input.rs Normal file
View file

@ -0,0 +1,250 @@
use nom::{
error::{ErrorKind, ParseError},
AsBytes, Compare, CompareResult, Err, FindSubstring, IResult, InputIter, InputLength,
InputTake, InputTakeAtPosition, Needed, Offset, Slice,
};
use std::{
ops::{Range, RangeFrom, RangeFull, RangeTo},
str::{Bytes, CharIndices, Chars},
};
use super::{
combinator::{token, GreenElement},
SyntaxKind,
};
use crate::config::ParseConfig;
/// A custom Input struct
///
/// It helps us to pass the `ParseConfig` all the way down to each parsers
#[derive(Clone, Copy, Debug)]
pub struct Input<'a> {
pub(crate) s: &'a str,
pub(crate) c: &'a ParseConfig,
}
impl<'a> Input<'a> {
#[inline]
pub(crate) fn of(&self, i: &'a str) -> Input<'a> {
Input { s: i, c: self.c }
}
#[inline]
pub fn as_str(&self) -> &'a str {
self.s
}
#[inline]
pub fn is_empty(&self) -> bool {
self.s.is_empty()
}
#[inline]
pub fn token(&self, kind: SyntaxKind) -> GreenElement {
token(kind, self.s)
}
#[inline]
pub fn text_token(&self) -> GreenElement {
token(SyntaxKind::TEXT, self.s)
}
#[inline]
pub fn ws_token(&self) -> GreenElement {
token(SyntaxKind::WHITESPACE, self.s)
}
#[inline]
pub fn nl_token(&self) -> GreenElement {
token(SyntaxKind::NEW_LINE, self.s)
}
#[inline]
pub fn bytes(&self) -> Bytes {
self.s.bytes()
}
}
impl<'a> From<(&'a str, &'a ParseConfig)> for Input<'a> {
fn from(value: (&'a str, &'a ParseConfig)) -> Self {
Input {
s: value.0,
c: value.1,
}
}
}
impl<'a> AsBytes for Input<'a> {
#[inline]
fn as_bytes(&self) -> &[u8] {
self.s.as_bytes()
}
}
impl<'a> Slice<Range<usize>> for Input<'a> {
fn slice(&self, range: Range<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeTo<usize>> for Input<'a> {
fn slice(&self, range: RangeTo<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeFrom<usize>> for Input<'a> {
fn slice(&self, range: RangeFrom<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeFull> for Input<'a> {
fn slice(&self, range: RangeFull) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a, 'b> FindSubstring<&'b str> for Input<'a> {
fn find_substring(&self, substr: &str) -> Option<usize> {
self.s.find(substr)
}
}
impl<'a, 'b> Compare<&'b str> for Input<'a> {
#[inline]
fn compare(&self, t: &'b str) -> CompareResult {
self.s.compare(t)
}
#[inline]
fn compare_no_case(&self, t: &'b str) -> CompareResult {
self.s.compare_no_case(t)
}
}
impl<'a> InputLength for Input<'a> {
#[inline]
fn input_len(&self) -> usize {
self.s.len()
}
}
impl<'a> InputIter for Input<'a> {
type Item = char;
type Iter = CharIndices<'a>;
type IterElem = Chars<'a>;
#[inline]
fn iter_indices(&self) -> Self::Iter {
self.s.char_indices()
}
#[inline]
fn iter_elements(&self) -> Self::IterElem {
self.s.chars()
}
fn position<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::Item) -> bool,
{
self.s.position(predicate)
}
#[inline]
fn slice_index(&self, count: usize) -> Result<usize, Needed> {
self.s.slice_index(count)
}
}
impl<'a> InputTake for Input<'a> {
#[inline]
fn take(&self, count: usize) -> Self {
let s = self.s.take(count);
self.of(s)
}
#[inline]
fn take_split(&self, count: usize) -> (Self, Self) {
let (l, r) = self.s.take_split(count);
(self.of(l), self.of(r))
}
}
impl<'a> InputTakeAtPosition for Input<'a> {
type Item = char;
#[inline]
fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self.s.split_at_position::<_, (&str, ErrorKind)>(predicate) {
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position1<P, E: ParseError<Self>>(
&self,
predicate: P,
e: ErrorKind,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position1::<_, (&str, ErrorKind)>(predicate, e)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position_complete<P, E: ParseError<Self>>(
&self,
predicate: P,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position_complete::<_, (&str, ErrorKind)>(predicate)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position1_complete<P, E: ParseError<Self>>(
&self,
predicate: P,
e: ErrorKind,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position1_complete::<_, (&str, ErrorKind)>(predicate, e)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
}
impl<'a> Offset for Input<'a> {
fn offset(&self, second: &Self) -> usize {
self.s.offset(second.s)
}
}

215
src/syntax/keyword.rs Normal file
View file

@ -0,0 +1,215 @@
use nom::{
bytes::complete::take_till,
character::complete::space0,
combinator::{cond, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, debug_assert_lossless, hash_plus_token, l_bracket_token,
r_bracket_token, trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind,
};
pub fn keyword_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(keyword_node_base)(input)
}
fn keyword_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, hash_plus, key)) = tuple((
space0,
hash_plus_token,
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '['),
))(input)?;
let is_babel_call = key.s.eq_ignore_ascii_case("CALL");
let (input, optional) = cond(
!is_babel_call,
opt(tuple((
l_bracket_token,
take_till(|c| c == ']' || c == '\n'),
r_bracket_token,
))),
)(input)?;
let (input, (colon, (value, ws_, nl), post_blank)) =
tuple((colon_token, trim_line_end, blank_lines))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(hash_plus);
b.text(key);
if let Some(Some((l_bracket, optional, r_bracket))) = optional {
b.children
.extend([l_bracket, optional.text_token(), r_bracket]);
}
b.push(colon);
b.ws(ws_);
b.text(value);
b.nl(nl);
b.children.extend(post_blank);
Ok((
input,
b.finish(if is_babel_call {
SyntaxKind::BABEL_CALL
} else {
SyntaxKind::KEYWORD
}),
))
}
pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
use rowan::NodeOrToken;
let mut children = vec![];
let mut i = input;
while !i.is_empty() {
let Ok((input, keyword)) = keyword_node(i) else {
break;
};
i = input;
let Some(node) = keyword.as_node() else {
return Err(nom::Err::Error(()));
};
// find the first text token in children
let Some(NodeOrToken::Token(token)) = node
.children()
.into_iter()
.find(|t| t.kind() == SyntaxKind::TEXT.into())
else {
return Err(nom::Err::Error(()));
};
let text = token.text();
if input.c.affiliated_keywords.iter().all(|w| w != text) && !text.starts_with("ATTR_") {
return Err(nom::Err::Error(()));
}
children.push(keyword);
}
Ok((i, children))
}
#[test]
fn parse() {
use crate::{
ast::{BabelCall, Keyword},
tests::to_ast,
ParseConfig,
};
let to_keyword = to_ast::<Keyword>(keyword_node);
let to_babel_call = to_ast::<BabelCall>(keyword_node);
insta::assert_debug_snapshot!(
to_keyword("#+KEY:").syntax,
@r###"
KEYWORD@0..6
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..6 ""
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+KEY: VALUE").syntax,
@r###"
KEYWORD@0..12
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..12 " VALUE"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+K_E_Y: VALUE").syntax,
@r###"
KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..7 "K_E_Y"
COLON@7..8 ":"
TEXT@8..14 " VALUE"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+KEY:VALUE\n").syntax,
@r###"
KEYWORD@0..12
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..11 "VALUE"
NEW_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+RESULTS:").syntax,
@r###"
KEYWORD@0..10
HASH_PLUS@0..2 "#+"
TEXT@2..9 "RESULTS"
COLON@9..10 ":"
TEXT@10..10 ""
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+ATTR_LATEX: :width 5cm\n").syntax,
@r###"
KEYWORD@0..25
HASH_PLUS@0..2 "#+"
TEXT@2..12 "ATTR_LATEX"
COLON@12..13 ":"
TEXT@13..24 " :width 5cm"
NEW_LINE@24..25 "\n"
"###
);
insta::assert_debug_snapshot!(
to_babel_call("#+CALL: double(n=4)").syntax,
@r###"
BABEL_CALL@0..19
HASH_PLUS@0..2 "#+"
TEXT@2..6 "CALL"
COLON@6..7 ":"
TEXT@7..19 " double(n=4)"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+CAPTION[Short caption]: Longer caption.").syntax,
@r###"
KEYWORD@0..41
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
L_BRACKET@9..10 "["
TEXT@10..23 "Short caption"
R_BRACKET@23..24 "]"
COLON@24..25 ":"
TEXT@25..41 " Longer caption."
"###
);
let config = &ParseConfig::default();
assert!(keyword_node(("#+KE Y: VALUE", config).into()).is_err());
assert!(keyword_node(("#+CALL[option]: VALUE", config).into()).is_err());
assert!(keyword_node(("#+ KEY: VALUE", config).into()).is_err());
}

89
src/syntax/link.rs Normal file
View file

@ -0,0 +1,89 @@
use nom::{
bytes::complete::take_while,
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
debug_assert_lossless, l_bracket2_token, l_bracket_token, node, r_bracket2_token,
r_bracket_token, GreenElement,
},
input::Input,
SyntaxKind::*,
};
pub fn link_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_bracket2_token,
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
opt(tuple((
r_bracket_token,
l_bracket_token,
take_while(|c: char| c != '[' && c != ']'),
))),
r_bracket2_token,
)),
|(l_bracket2, path, desc, r_bracket2)| {
let mut children = vec![l_bracket2, path.token(LINK_PATH)];
if let Some((r_bracket, l_bracket, desc)) = desc {
children.extend([r_bracket, l_bracket, desc.text_token()]);
}
children.push(r_bracket2);
node(LINK, children)
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::Link, tests::to_ast, ParseConfig};
let to_link = to_ast::<Link>(link_node);
let link = to_link("[[#id]]");
assert_eq!(link.path().as_ref().map(|x| x.text()), Some("#id"));
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..7
L_BRACKET2@0..2 "[["
LINK_PATH@2..5 "#id"
R_BRACKET2@5..7 "]]"
"###
);
let link = to_link("[[#id][desc]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..13
L_BRACKET2@0..2 "[["
LINK_PATH@2..5 "#id"
R_BRACKET@5..6 "]"
L_BRACKET@6..7 "["
TEXT@7..11 "desc"
R_BRACKET2@11..13 "]]"
"###
);
let link = to_link("[[file:/home/dominik/images/jupiter.jpg]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..41
L_BRACKET2@0..2 "[["
LINK_PATH@2..39 "file:/home/dominik/im ..."
R_BRACKET2@39..41 "]]"
"###
);
let config = &ParseConfig::default();
assert!(link_node(("[[#id][desc]", config).into()).is_err());
}

583
src/syntax/list.rs Normal file
View file

@ -0,0 +1,583 @@
use memchr::{memchr, memchr2};
use nom::{
branch::alt,
bytes::complete::{tag, take},
character::complete::{alphanumeric1, digit1, space0},
combinator::{cond, map, opt, recognize, verify},
sequence::{preceded, tuple},
AsBytes, IResult, InputLength, InputTake,
};
use super::{
combinator::{
at_token, blank_lines, colon2_token, debug_assert_lossless, l_bracket_token,
line_starts_iter, node, r_bracket_token, GreenElement,
},
element::element_node,
input::Input,
object::object_nodes,
SyntaxKind::*,
};
pub fn list_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(list_node_base)(input)
}
fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, first_indent) = space0(input)?;
let (input, first_item) = list_item_node(first_indent, input)?;
let mut children = vec![first_item];
let mut input = input;
while !input.is_empty() {
let (input_, indent) = space0(input)?;
if indent.input_len() != first_indent.input_len() {
break;
}
if let Ok((input_, list_item)) = list_item_node(indent, input_) {
children.push(list_item);
input = input_;
} else {
break;
}
}
let (input, post_blank) = blank_lines(input)?;
children.extend(post_blank);
Ok((input, node(LIST, children)))
}
fn list_item_node<'a>(indent: Input<'a>, input: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
let (input, bullet) = recognize(tuple((
alt((
tag("+"),
tag("*"),
tag("-"),
preceded(digit1, tag(".")),
preceded(digit1, tag(")")),
)),
space0,
)))(input)?;
// bullet must ends with whitespace,
if !(bullet
.s
.bytes()
.last()
.map(|b| b == b' ' || b == b'\t')
.unwrap_or(true)
// or input should be a line end
|| input
.s
.bytes()
.next()
.map(|b| b == b'\r' || b == b'\n')
.unwrap_or(true))
{
return Err(nom::Err::Error(()));
}
let is_ordered = bullet.s.starts_with(|c: char| c.is_ascii_digit());
let (input, counter) = opt(list_item_counter)(input)?;
let (input, checkbox) = opt(list_item_checkbox)(input)?;
let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?;
let (input, content) = list_item_content_node(input, indent.input_len())?;
let mut children = vec![
indent.token(LIST_ITEM_INDENT),
bullet.token(LIST_ITEM_BULLET),
];
if let Some((counter, ws)) = counter {
children.extend([counter, ws.ws_token()]);
}
if let Some((checkbox, ws)) = checkbox {
children.extend([checkbox, ws.ws_token()]);
}
if let Some(Some((tag, ws))) = tag {
children.extend([tag, ws.ws_token()]);
}
children.push(content);
Ok((input, node(LIST_ITEM, children)))
}
fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)),
|(l_bracket, at, char, r_bracket)| {
node(
LIST_ITEM_COUNTER,
[l_bracket, at, char.text_token(), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((
l_bracket_token,
verify(take(1usize), |input: &Input| {
input.s == " " || input.s == "X" || input.s == "-"
}),
r_bracket_token,
)),
|(l_bracket, char, r_bracket)| {
node(
LIST_ITEM_CHECK_BOX,
[l_bracket, char.text_token(), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let bytes = input.as_bytes();
let (input, tag) = match memchr2(b'\n', b':', bytes) {
Some(idx) if idx > 0 && bytes[idx] == b':' => input.take_split(idx),
_ => return Err(nom::Err::Error(())),
};
let (input, ws) = space0(input)?;
let (input, colon2) = colon2_token(input)?;
let mut children = object_nodes(tag);
children.push(colon2);
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
}
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, GreenElement, ()> {
if memchr(b'\n', input.as_bytes()).is_none() {
return Ok((
input.of(""),
node(LIST_ITEM_CONTENT, [node(PARAGRAPH, object_nodes(input))]),
));
};
let mut skip_one = true;
let mut i = input;
let mut children = vec![];
let mut previous_line_is_blank = false;
'l: loop {
for (input, head) in line_starts_iter(i.as_str())
// the first line in list item content will always be a paragraph
// so we need to skip it in the first iteration
.skip(if skip_one { 1 } else { 0 })
.map(|idx| i.take_split(idx))
{
match get_line_indent(input.as_str()) {
Some(next_indent) => {
previous_line_is_blank = false;
if next_indent <= indent {
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
}
return Ok((input, node(LIST_ITEM_CONTENT, children)));
}
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
}
children.push(element);
i = input;
skip_one = false;
continue 'l;
}
}
_ if previous_line_is_blank => {
// list item ends at two consecutive empty lines
if !head.is_empty() {
children.push(node(PARAGRAPH, object_nodes(head)));
}
let (input, post_blank) = blank_lines(input)?;
children.extend(post_blank);
return Ok((input, node(LIST_ITEM_CONTENT, children)));
}
_ => {
previous_line_is_blank = true;
}
}
}
break;
}
if !i.is_empty() {
children.push(node(PARAGRAPH, object_nodes(i)));
}
Ok((input.of(""), node(LIST_ITEM_CONTENT, children)))
}
fn get_line_indent(input: &str) -> Option<usize> {
input
.bytes()
.take_while(|b| *b != b'\n')
.position(|b| !b.is_ascii_whitespace())
}
#[test]
fn parse() {
use crate::{ast::List, tests::to_ast, ParseConfig};
let to_list = to_ast::<List>(list_node);
let list = to_list("1)");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "1)"
LIST_ITEM_CONTENT@2..2
PARAGRAPH@2..2
"###
);
let list = to_list("+ ");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..2
PARAGRAPH@2..2
"###
);
let list = to_list("-\n");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..1 "-"
LIST_ITEM_CONTENT@1..2
PARAGRAPH@1..2
TEXT@1..2 "\n"
"###
);
let list = to_list("+ 1");
assert!(!list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..3
LIST_ITEM@0..3
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..3
PARAGRAPH@2..3
TEXT@2..3 "1"
"###
);
let list = to_list("+ 1\n");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..4
LIST_ITEM@0..4
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..4
PARAGRAPH@2..4
TEXT@2..4 "1\n"
"###
);
let list = to_list("+ [@A] 1\n\n\n+ 2");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..14
LIST_ITEM@0..11
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_COUNTER@2..6
L_BRACKET@2..3 "["
AT@3..4 "@"
TEXT@4..5 "A"
R_BRACKET@5..6 "]"
WHITESPACE@6..7 " "
LIST_ITEM_CONTENT@7..11
PARAGRAPH@7..10
TEXT@7..10 "1\n\n"
BLANK_LINE@10..11
NEW_LINE@10..11 "\n"
LIST_ITEM@11..14
LIST_ITEM_INDENT@11..11 ""
LIST_ITEM_BULLET@11..13 "+ "
LIST_ITEM_CONTENT@13..14
PARAGRAPH@13..14
TEXT@13..14 "2"
"###
);
let list = to_list("+ *TAG* :: item1\n+ [X] item2");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..28
LIST_ITEM@0..17
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_TAG@2..10
BOLD@2..7
STAR@2..3 "*"
TEXT@3..6 "TAG"
STAR@6..7 "*"
TEXT@7..8 " "
COLON2@8..10 "::"
WHITESPACE@10..10 ""
LIST_ITEM_CONTENT@10..17
PARAGRAPH@10..17
TEXT@10..17 " item1\n"
LIST_ITEM@17..28
LIST_ITEM_INDENT@17..17 ""
LIST_ITEM_BULLET@17..19 "+ "
LIST_ITEM_CHECK_BOX@19..22
L_BRACKET@19..20 "["
TEXT@20..21 "X"
R_BRACKET@21..22 "]"
WHITESPACE@22..23 " "
LIST_ITEM_CONTENT@23..28
PARAGRAPH@23..28
TEXT@23..28 "item2"
"###
);
let list = to_list(
r#"+ item1
+ item2"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..17
LIST_ITEM@0..17
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..17
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
LIST@8..17
LIST_ITEM@8..17
LIST_ITEM_INDENT@8..10 " "
LIST_ITEM_BULLET@10..12 "+ "
LIST_ITEM_CONTENT@12..17
PARAGRAPH@12..17
TEXT@12..17 "item2"
"###
);
let list = to_list("* item1\nitem2");
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..8
LIST_ITEM@0..8
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "* "
LIST_ITEM_CONTENT@2..8
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
"###
);
let list = to_list(
r#"* item1
still item 1"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..23
LIST_ITEM@0..23
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "* "
LIST_ITEM_CONTENT@2..23
PARAGRAPH@2..23
TEXT@2..23 "item1\n\n still item 1"
"###
);
let list = to_list(
r#"+ item1
+ item2
"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..26
LIST_ITEM@0..26
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..26
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
LIST@8..26
LIST_ITEM@8..26
LIST_ITEM_INDENT@8..14 " "
LIST_ITEM_BULLET@14..16 "+ "
LIST_ITEM_CONTENT@16..26
PARAGRAPH@16..26
TEXT@16..26 "item2\n "
"###
);
let list = to_list(
r#"1. item1
- item2
3. item 3"#,
);
assert!(list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..32
LIST_ITEM@0..23
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..3 "1. "
LIST_ITEM_CONTENT@3..23
PARAGRAPH@3..10
TEXT@3..10 "item1\n\n"
LIST@10..23
LIST_ITEM@10..23
LIST_ITEM_INDENT@10..14 " "
LIST_ITEM_BULLET@14..16 "- "
LIST_ITEM_CONTENT@16..23
PARAGRAPH@16..23
TEXT@16..23 "item2\n\n"
LIST_ITEM@23..32
LIST_ITEM_INDENT@23..23 ""
LIST_ITEM_BULLET@23..26 "3. "
LIST_ITEM_CONTENT@26..32
PARAGRAPH@26..32
TEXT@26..32 "item 3"
"###
);
let list = to_list(
r#" + item1
+ item2"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..20
LIST_ITEM@0..11
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..4 "+ "
LIST_ITEM_CONTENT@4..11
PARAGRAPH@4..11
TEXT@4..11 "item1\n\n"
LIST_ITEM@11..20
LIST_ITEM_INDENT@11..13 " "
LIST_ITEM_BULLET@13..15 "+ "
LIST_ITEM_CONTENT@15..20
PARAGRAPH@15..20
TEXT@15..20 "item2"
"###
);
let list = to_list(
r#" 1. item1
2. item2
3. item3"#,
);
assert!(list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..42
LIST_ITEM@0..42
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..5 "1. "
LIST_ITEM_CONTENT@5..42
PARAGRAPH@5..11
TEXT@5..11 "item1\n"
LIST@11..28
LIST_ITEM@11..28
LIST_ITEM_INDENT@11..19 " "
LIST_ITEM_BULLET@19..22 "2. "
LIST_ITEM_CONTENT@22..28
PARAGRAPH@22..28
TEXT@22..28 "item2\n"
LIST@28..42
LIST_ITEM@28..42
LIST_ITEM_INDENT@28..34 " "
LIST_ITEM_BULLET@34..37 "3. "
LIST_ITEM_CONTENT@37..42
PARAGRAPH@37..42
TEXT@37..42 "item3"
"###
);
let list = to_list(
r#" 1. item1
#+begin_example
hello
#+end_example
"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..51
LIST_ITEM@0..51
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..5 "1. "
LIST_ITEM_CONTENT@5..51
PARAGRAPH@5..11
TEXT@5..11 "item1\n"
EXAMPLE_BLOCK@11..51
BLOCK_BEGIN@11..31
WHITESPACE@11..15 " "
TEXT@15..23 "#+begin_"
TEXT@23..30 "example"
TEXT@30..30 ""
NEW_LINE@30..31 "\n"
BLOCK_CONTENT@31..37
TEXT@31..37 "hello\n"
BLOCK_END@37..51
TEXT@37..43 "#+end_"
TEXT@43..50 "example"
NEW_LINE@50..51 "\n"
"###
);
let config = &ParseConfig::default();
assert!(list_node(("-a", config).into()).is_err());
}

108
src/syntax/macros.rs Normal file
View file

@ -0,0 +1,108 @@
use nom::{
bytes::complete::{take_until, take_while1},
combinator::{map, opt, verify},
sequence::tuple,
AsBytes, IResult,
};
use super::{
combinator::{
debug_assert_lossless, l_curly3_token, l_parens_token, node, r_curly3_token,
r_parens_token, GreenElement,
},
input::Input,
SyntaxKind::*,
};
pub fn macros_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_curly3_token,
verify(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &Input| s.as_bytes()[0].is_ascii_alphabetic(),
),
opt(tuple((l_parens_token, take_until(")}}}"), r_parens_token))),
r_curly3_token,
)),
|(l_curly3, name, argument, r_curly3)| {
let mut children = vec![];
children.push(l_curly3);
children.push(name.text_token());
if let Some((l_parens, argument, r_parens)) = argument {
children.push(node(
MACROS_ARGUMENT,
[l_parens, argument.text_token(), r_parens],
));
}
children.push(r_curly3);
node(MACROS, children)
},
))(input)
}
#[test]
fn test() {
use crate::{ast::Macros, tests::to_ast, ParseConfig};
let to_macros = to_ast::<Macros>(macros_node);
insta::assert_debug_snapshot!(
to_macros("{{{title}}}").syntax,
@r###"
MACROS@0..11
L_CURLY3@0..3 "{{{"
TEXT@3..8 "title"
R_CURLY3@8..11 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{one_arg_macro(1)}}}").syntax,
@r###"
MACROS@0..22
L_CURLY3@0..3 "{{{"
TEXT@3..16 "one_arg_macro"
MACROS_ARGUMENT@16..19
L_PARENS@16..17 "("
TEXT@17..18 "1"
R_PARENS@18..19 ")"
R_CURLY3@19..22 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{two_arg_macro(1, 2)}}}").syntax,
@r###"
MACROS@0..25
L_CURLY3@0..3 "{{{"
TEXT@3..16 "two_arg_macro"
MACROS_ARGUMENT@16..22
L_PARENS@16..17 "("
TEXT@17..21 "1, 2"
R_PARENS@21..22 ")"
R_CURLY3@22..25 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{two_arg_macro(1\\,a, 2)}}}").syntax,
@r###"
MACROS@0..28
L_CURLY3@0..3 "{{{"
TEXT@3..16 "two_arg_macro"
MACROS_ARGUMENT@16..25
L_PARENS@16..17 "("
TEXT@17..24 "1\\,a, 2"
R_PARENS@24..25 ")"
R_CURLY3@25..28 "}}}"
"###
);
let config = &ParseConfig::default();
assert!(macros_node(("{{{0uthor}}}", config).into()).is_err());
assert!(macros_node(("{{{author}}", config).into()).is_err());
assert!(macros_node(("{{{poem(}}}", config).into()).is_err());
assert!(macros_node(("{{{poem)}}}", config).into()).is_err());
}

209
src/syntax/mod.rs Normal file
View file

@ -0,0 +1,209 @@
//! Org-mode elements
pub mod block;
pub mod clock;
pub mod combinator;
pub mod comment;
pub mod cookie;
pub mod document;
pub mod drawer;
pub mod dyn_block;
pub mod element;
pub mod emphasis;
pub mod fixed_width;
pub mod fn_def;
pub mod fn_ref;
pub mod headline;
pub mod inline_call;
pub mod inline_src;
pub mod input;
pub mod keyword;
pub mod link;
pub mod list;
pub mod macros;
pub mod object;
pub mod paragraph;
pub mod planning;
pub mod radio_target;
pub mod rule;
pub mod snippet;
pub mod table;
pub mod target;
pub mod timestamp;
use rowan::Language;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OrgLanguage;
impl Language for OrgLanguage {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind {
// SAFETY: SyntaxKind is `repr(u16)`
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind {
rowan::SyntaxKind(kind as u16)
}
}
pub type SyntaxNode = rowan::SyntaxNode<OrgLanguage>;
pub type SyntaxToken = rowan::SyntaxToken<OrgLanguage>;
pub type SyntaxElement = rowan::SyntaxElement<OrgLanguage>;
pub type SyntaxNodeChildren = rowan::SyntaxNodeChildren<OrgLanguage>;
pub type SyntaxElementChildren = rowan::SyntaxElementChildren<OrgLanguage>;
#[allow(bad_style)]
#[allow(clippy::all)]
#[non_exhaustive]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
#[repr(u16)]
pub enum SyntaxKind {
//
// token
//
L_BRACKET, // '['
R_BRACKET, // ']'
L_BRACKET2, // '[['
R_BRACKET2, // ']]'
L_PARENS, // '('
R_PARENS, // ')'
L_ANGLE, // '<'
R_ANGLE, // '>'
L_CURLY, // '{'
R_CURLY, // '}'
L_CURLY3, // '{{{'
R_CURLY3, // '}}}'
L_ANGLE2, // '<<'
R_ANGLE2, // '>>'
L_ANGLE3, // '<<<'
R_ANGLE3, // '>>>'
AT, // '@'
AT2, // '@@'
PERCENT, // '%'
PERCENT2, // '%%'
SLASH, // '/'
UNDERSCORE, // '_'
STAR, // '*'
PLUS, // '+'
MINUS, // '-'
MINUS2, // '--'
COLON, // ':'
COLON2, // '::'
EQUAL, // '='
TILDE, // '~'
HASH, // '#'
HASH_PLUS, // '#+'
DOUBLE_ARROW, // '=>'
PIPE, // '|'
COMMA, // ','
TEXT,
BLANK_LINE,
WHITESPACE,
NEW_LINE,
DOCUMENT,
SECTION,
PARAGRAPH,
HEADLINE,
HEADLINE_STARS,
HEADLINE_TITLE,
HEADLINE_KEYWORD,
HEADLINE_PRIORITY,
HEADLINE_TAGS,
PROPERTY_DRAWER,
NODE_PROPERTY,
PLANNING,
PLANNING_DEADLINE,
PLANNING_SCHEDULED,
PLANNING_CLOSED,
//
// elements
//
/* table */
ORG_TABLE,
ORG_TABLE_RULE_ROW,
ORG_TABLE_STANDARD_ROW,
ORG_TABLE_CELL,
/* list */
LIST,
LIST_ITEM,
LIST_ITEM_INDENT,
LIST_ITEM_BULLET,
LIST_ITEM_COUNTER,
LIST_ITEM_CHECK_BOX,
LIST_ITEM_TAG,
LIST_ITEM_CONTENT,
/* drawer */
DRAWER,
DRAWER_BEGIN,
DRAWER_END,
KEYWORD,
BABEL_CALL,
TABLE_EL,
CLOCK,
FN_DEF,
COMMENT,
RULE,
FIXED_WIDTH,
/* dyn block */
DYN_BLOCK,
DYN_BLOCK_BEGIN,
DYN_BLOCK_END,
/* block */
SPECIAL_BLOCK,
QUOTE_BLOCK,
CENTER_BLOCK,
VERSE_BLOCK,
COMMENT_BLOCK,
EXAMPLE_BLOCK,
EXPORT_BLOCK,
SOURCE_BLOCK,
SOURCE_BLOCK_LANG,
BLOCK_BEGIN,
BLOCK_END,
BLOCK_CONTENT,
//
// objects
//
INLINE_CALL,
INLINE_SRC,
LINK,
LINK_PATH,
COOKIE,
RADIO_TARGET,
FN_REF,
LATEX_ENVIRONMENT,
MACROS,
MACROS_ARGUMENT,
SNIPPET,
TARGET,
BOLD,
STRIKE,
ITALIC,
UNDERLINE,
VERBATIM,
CODE,
/* timestamp */
TIMESTAMP_ACTIVE,
TIMESTAMP_INACTIVE,
TIMESTAMP_DIARY,
TIMESTAMP_YEAR,
TIMESTAMP_MONTH,
TIMESTAMP_DAY,
TIMESTAMP_HOUR,
TIMESTAMP_MINUTE,
TIMESTAMP_DAYNAME,
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(value: SyntaxKind) -> Self {
OrgLanguage::kind_to_raw(value)
}
}

194
src/syntax/object.rs Normal file
View file

@ -0,0 +1,194 @@
use nom::{AsBytes, IResult, InputLength, InputTake};
use super::{
combinator::GreenElement,
cookie::cookie_node,
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
fn_ref::fn_ref_node,
inline_call::inline_call_node,
inline_src::inline_src_node,
input::Input,
link::link_node,
macros::macros_node,
radio_target::radio_target_node,
snippet::snippet_node,
target::target_node,
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
};
pub struct InlinePositions<'a> {
bytes: &'a [u8],
pos: usize,
next: Option<usize>,
}
impl InlinePositions<'_> {
pub fn new(bytes: &[u8]) -> InlinePositions {
InlinePositions {
bytes,
pos: 0,
next: Some(0),
}
}
}
impl Iterator for InlinePositions<'_> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
self.next.take().or_else(|| {
jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n')
.find(&self.bytes[self.pos..])
.map(|i| {
self.pos += i + 1;
match self.bytes[self.pos - 1] {
b'{' => {
self.next = Some(self.pos);
self.pos - 1
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
_ => self.pos - 1,
}
})
})
}
}
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
// debug_assert!(!input.is_empty());
let nodes = object_nodes_base(input);
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
"parser must be lossless"
);
nodes
}
fn object_nodes_base(input: Input) -> Vec<GreenElement> {
let mut children = vec![];
let mut i = input;
'l: loop {
for (input, head) in InlinePositions::new(i.as_bytes()).map(|idx| i.take_split(idx)) {
if let Ok((input, node)) = object_node(input) {
if !head.is_empty() {
children.push(head.text_token())
}
children.push(node);
i = input;
continue 'l;
}
}
break;
}
if !i.is_empty() {
children.push(i.text_token());
}
children
}
fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
if i.input_len() < 3 {
return Err(nom::Err::Error(()));
}
match &i.as_bytes()[0] {
b'*' => bold_node(i),
b'+' => strike_node(i),
b'/' => italic_node(i),
b'_' => underline_node(i),
b'=' => verbatim_node(i),
b'~' => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))
.or_else(|_| timestamp_active_node(i)),
b'[' => cookie_node(i)
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
b'c' => inline_call_node(i),
b's' => inline_src_node(i),
_ => Err(nom::Err::Error(())),
}
}
#[test]
fn parse() {
use crate::{
syntax::{combinator::node, SyntaxKind, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = object_nodes((input, config).into());
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
};
insta::assert_debug_snapshot!(
t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"),
@r###"
PARAGRAPH@0..93
CODE@0..26
TILDE@0..1 "~"
TEXT@1..25 "org-inlinetask-min-level"
TILDE@25..26 "~"
FN_REF@26..93
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "oiml"
COLON@34..35 ":"
TEXT@35..57 "The default value of \n"
CODE@57..83
TILDE@57..58 "~"
TEXT@58..82 "org-inlinetask-min-level"
TILDE@82..83 "~"
TEXT@83..87 " is "
VERBATIM@87..91
EQUAL@87..88 "="
TEXT@88..90 "15"
EQUAL@90..91 "="
TEXT@91..92 "."
R_BRACKET@92..93 "]"
"###
);
insta::assert_debug_snapshot!(
t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003.
The canonical parser is =org-element.el=, which provides a number of
functions starting with ~org-element-~."#),
@r###"
PARAGRAPH@0..175
TEXT@0..9 "Org is a "
ITALIC@9..34
SLASH@9..10 "/"
TEXT@10..33 "plaintext markup syntax"
SLASH@33..34 "/"
TEXT@34..50 " developed with "
BOLD@50..57
STAR@50..51 "*"
TEXT@51..56 "Emacs"
STAR@56..57 "*"
TEXT@57..91 " in 2003.\nThe canonic ..."
VERBATIM@91..107
EQUAL@91..92 "="
TEXT@92..106 "org-element.el"
EQUAL@106..107 "="
TEXT@107..160 ", which provides a nu ..."
CODE@160..174
TILDE@160..161 "~"
TEXT@161..173 "org-element-"
TILDE@173..174 "~"
TEXT@174..175 "."
"###
);
}

96
src/syntax/paragraph.rs Normal file
View file

@ -0,0 +1,96 @@
use nom::{IResult, InputTake};
use super::{
combinator::{blank_lines, debug_assert_lossless, line_ends_iter, node, GreenElement},
input::Input,
object::object_nodes,
SyntaxKind,
};
fn paragraph_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
let mut start = 0;
for idx in line_ends_iter(input.as_str()) {
// stops at blank line
if input.s[start..idx].bytes().all(|c| c.is_ascii_whitespace()) {
break;
}
start = idx;
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.extend(object_nodes(contents));
children.extend(post_blank);
Ok((input, node(SyntaxKind::PARAGRAPH, children)))
}
pub fn paragraph_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(paragraph_node_base)(input)
}
pub fn paragraph_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
let mut i = input;
let mut children = vec![];
while !i.is_empty() {
let (input, node) = paragraph_node(i)?;
children.push(node);
i = input;
}
Ok(children)
}
#[test]
fn parse() {
use crate::{ast::Paragraph, tests::to_ast};
let to_paragraph = to_ast::<Paragraph>(paragraph_node);
insta::assert_debug_snapshot!(
to_paragraph(r#"a"#).syntax,
@r###"
PARAGRAPH@0..1
TEXT@0..1 "a"
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
"#).syntax,
@r###"
PARAGRAPH@0..6
TEXT@0..2 "a\n"
BLANK_LINE@2..6
WHITESPACE@2..6 " "
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
b
c
"#).syntax,
@r###"
PARAGRAPH@0..6
TEXT@0..6 "a\nb\nc\n"
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
c
"#).syntax,
@r###"
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
"###
);
}

94
src/syntax/planning.rs Normal file
View file

@ -0,0 +1,94 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{line_ending, space0},
combinator::{eof, iterator},
sequence::tuple,
IResult,
};
use super::{
combinator::{debug_assert_lossless, GreenElement, NodeBuilder},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind::*,
};
pub fn planning_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(planning_node_base)(input)
}
fn planning_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let mut it = iterator(
input,
tuple((
space0,
alt((tag("DEADLINE:"), tag("SCHEDULED:"), tag("CLOSED:"))),
space0,
alt((timestamp_active_node, timestamp_inactive_node)),
)),
);
let start_len = b.len();
it.for_each(|(ws, text, ws_, timestamp)| {
let mut b_ = NodeBuilder::new();
b_.ws(ws);
b_.text(text);
b_.ws(ws_);
b_.push(timestamp);
b.push(b_.finish(match text.as_str() {
"DEADLINE:" => PLANNING_DEADLINE,
"SCHEDULED:" => PLANNING_SCHEDULED,
"CLOSED:" => PLANNING_CLOSED,
_ => unreachable!(),
}));
});
if b.len() == start_len {
return Err(nom::Err::Error(()));
}
let (input, _) = it.finish()?;
let (input, ws) = space0(input)?;
let (input, nl) = alt((line_ending, eof))(input)?;
b.ws(ws);
b.nl(nl);
Ok((input, b.finish(PLANNING)))
}
#[test]
fn prase() {
use crate::{ast::Planning, tests::to_ast, ParseConfig};
let to_planning = to_ast::<Planning>(planning_node);
insta::assert_debug_snapshot!(
to_planning("SCHEDULED: <2019-04-08 Mon>").syntax,
@r###"
PLANNING@0..27
PLANNING_SCHEDULED@0..27
TEXT@0..10 "SCHEDULED:"
WHITESPACE@10..11 " "
TIMESTAMP_ACTIVE@11..27
L_ANGLE@11..12 "<"
TIMESTAMP_YEAR@12..16 "2019"
MINUS@16..17 "-"
TIMESTAMP_MONTH@17..19 "04"
MINUS@19..20 "-"
TIMESTAMP_DAY@20..22 "08"
WHITESPACE@22..23 " "
TIMESTAMP_DAYNAME@23..26 "Mon"
R_ANGLE@26..27 ">"
"###
);
let config = &ParseConfig::default();
assert!(planning_node((" ", config).into()).is_err());
assert!(planning_node((" SCHEDULED: ", config).into()).is_err());
}

View file

@ -0,0 +1,68 @@
use nom::{
bytes::complete::take_while,
combinator::{map, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{debug_assert_lossless, l_angle3_token, node, r_angle3_token, GreenElement},
input::Input,
SyntaxKind::*,
};
// TODO: text-markup, entities, latex-fragments, subscript and superscript
pub fn radio_target_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_angle3_token,
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &Input| {
s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ')
},
),
r_angle3_token,
)),
|(l_angle3, contents, r_angle3)| {
node(RADIO_TARGET, [l_angle3, contents.text_token(), r_angle3])
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::RadioTarget, tests::to_ast, ParseConfig};
let to_radio_target = to_ast::<RadioTarget>(radio_target_node);
insta::assert_debug_snapshot!(
to_radio_target("<<<target>>>").syntax,
@r###"
RADIO_TARGET@0..12
L_ANGLE3@0..3 "<<<"
TEXT@3..9 "target"
R_ANGLE3@9..12 ">>>"
"###
);
insta::assert_debug_snapshot!(
to_radio_target("<<<tar get>>>").syntax,
@r###"
RADIO_TARGET@0..13
L_ANGLE3@0..3 "<<<"
TEXT@3..10 "tar get"
R_ANGLE3@10..13 ">>>"
"###
);
let config = &ParseConfig::default();
assert!(radio_target_node(("<<<target >>>", config).into()).is_err());
assert!(radio_target_node(("<<< target>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta<get>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta>get>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta\nget>>>", config).into()).is_err());
assert!(radio_target_node(("<<<target>>", config).into()).is_err());
}

93
src/syntax/rule.rs Normal file
View file

@ -0,0 +1,93 @@
use nom::{
branch::alt,
bytes::complete::take_while_m_n,
character::complete::{line_ending, space0},
combinator::{eof, map},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, debug_assert_lossless, GreenElement, NodeBuilder},
input::Input,
SyntaxKind::*,
};
pub fn rule_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
space0,
take_while_m_n(5, usize::max_value(), |c| c == '-'),
space0,
alt((line_ending, eof)),
blank_lines,
)),
|(ws, dashes, ws_, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(dashes);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(RULE)
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::Rule, tests::to_ast, ParseConfig};
let to_rule = to_ast::<Rule>(rule_node);
insta::assert_debug_snapshot!(
to_rule("-----").syntax,
@r###"
RULE@0..5
TEXT@0..5 "-----"
"###
);
insta::assert_debug_snapshot!(
to_rule("--------").syntax,
@r###"
RULE@0..8
TEXT@0..8 "--------"
"###
);
insta::assert_debug_snapshot!(
to_rule("-----\n\n\n").syntax,
@r###"
RULE@0..8
TEXT@0..5 "-----"
NEW_LINE@5..6 "\n"
BLANK_LINE@6..7
NEW_LINE@6..7 "\n"
BLANK_LINE@7..8
NEW_LINE@7..8 "\n"
"###
);
insta::assert_debug_snapshot!(
to_rule("----- \n").syntax,
@r###"
RULE@0..8
TEXT@0..5 "-----"
WHITESPACE@5..7 " "
NEW_LINE@7..8 "\n"
"###
);
let config = &ParseConfig::default();
assert!(rule_node(("", config).into()).is_err());
assert!(rule_node(("----", config).into()).is_err());
assert!(rule_node(("None----", config).into()).is_err());
assert!(rule_node(("None ----", config).into()).is_err());
assert!(rule_node(("None------", config).into()).is_err());
assert!(rule_node(("----None----", config).into()).is_err());
assert!(rule_node(("\t\t----", config).into()).is_err());
assert!(rule_node(("------None", config).into()).is_err());
assert!(rule_node(("----- None", config).into()).is_err());
}

91
src/syntax/snippet.rs Normal file
View file

@ -0,0 +1,91 @@
use nom::{
bytes::complete::{take_until, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{at2_token, colon_token, debug_assert_lossless, node, GreenElement},
input::Input,
SyntaxKind::*,
};
pub fn snippet_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
at2_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
colon_token,
take_until("@@"),
at2_token,
)),
|(at2, name, colon, value, at2_)| {
node(
SNIPPET,
[at2, name.text_token(), colon, value.text_token(), at2_],
)
},
))(input)
}
#[test]
fn parse() {
use crate::{ast::Snippet, tests::to_ast, ParseConfig};
let to_snippet = to_ast::<Snippet>(snippet_node);
insta::assert_debug_snapshot!(
to_snippet("@@html:<b>@@").syntax,
@r###"
SNIPPET@0..12
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..10 "<b>"
AT2@10..12 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@latex:any arbitrary LaTeX code@@").syntax,
@r###"
SNIPPET@0..34
AT2@0..2 "@@"
TEXT@2..7 "latex"
COLON@7..8 ":"
TEXT@8..32 "any arbitrary LaTeX code"
AT2@32..34 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@html:@@").syntax,
@r###"
SNIPPET@0..9
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..7 ""
AT2@7..9 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@html:<p>@</p>@@").syntax,
@r###"
SNIPPET@0..17
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..15 "<p>@</p>"
AT2@15..17 "@@"
"###
);
let config = &ParseConfig::default();
assert!(snippet_node(("@@html:<b>@", config).into()).is_err());
assert!(snippet_node(("@@html<b>@@", config).into()).is_err());
assert!(snippet_node(("@@:<b>@@", config).into()).is_err());
}

209
src/syntax/table.rs Normal file
View file

@ -0,0 +1,209 @@
use nom::{
bytes::complete::take_while,
character::complete::{multispace0, space0},
combinator::iterator,
sequence::tuple,
AsBytes, Err, IResult, InputTake, Slice,
};
use super::{
combinator::{
blank_lines, debug_assert_lossless, line_ends_iter, node, pipe_token, GreenElement,
NodeBuilder,
},
input::Input,
object::object_nodes,
SyntaxKind::*,
};
fn org_table_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![];
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = input.slice(start..i);
let trimmed = line.as_str().trim_start();
// Org tables end at the first line not starting with a vertical bar.
if !trimmed.starts_with('|') {
if start == 0 {
return Err(nom::Err::Error(()));
} else {
break;
}
}
if trimmed.starts_with("|-") {
children.push(node(ORG_TABLE_RULE_ROW, [line.text_token()]));
} else {
children.push(table_standard_row_node(line)?);
}
start = i;
}
let (input, post_blank) = blank_lines(input.slice(start..))?;
children.extend(post_blank);
Ok((input, node(ORG_TABLE, children)))
}
fn table_standard_row_node(input: Input) -> Result<GreenElement, nom::Err<()>> {
let mut b = NodeBuilder::new();
let (input, ws) = space0(input)?;
b.ws(ws);
let mut it = iterator(
input,
tuple((pipe_token, multispace0, take_while(|c: char| c != '|'))),
);
it.for_each(|(pipe, ws, input)| {
b.push(pipe);
b.ws(ws);
if input.is_empty() {
return;
}
match input
.as_bytes()
.iter()
.rposition(|b| !b.is_ascii_whitespace())
{
Some(idx) => {
let (ws, cell) = input.take_split(idx + 1);
b.push(node(ORG_TABLE_CELL, object_nodes(cell)));
b.ws(ws);
}
_ => {
b.push(node(ORG_TABLE_CELL, object_nodes(input)));
}
}
});
it.finish()?;
Ok(b.finish(ORG_TABLE_STANDARD_ROW))
}
fn table_el_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = &input.s[start..i];
let trimmed = line.trim();
if start == 0 {
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !trimmed.starts_with("+-") || trimmed.bytes().any(|c| c != b'+' && c != b'-') {
return Err(Err::Error(()));
}
}
// Table.el tables end at the first line not starting with either a vertical line or a plus sign.
if !trimmed.starts_with('|') && !trimmed.starts_with('+') {
break;
}
start = i;
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.push(contents.text_token());
children.extend(post_blank);
Ok((input, node(TABLE_EL, children)))
}
pub fn org_table_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(org_table_node_base)(input)
}
pub fn table_el_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(table_el_node_base)(input)
}
#[test]
fn parse_org_table() {
use crate::{ast::OrgTable, tests::to_ast};
let to_org_table = to_ast::<OrgTable>(org_table_node);
insta::assert_debug_snapshot!(
to_org_table("|").syntax,
@r###"
ORG_TABLE@0..1
ORG_TABLE_STANDARD_ROW@0..1
PIPE@0..1 "|"
"###
);
insta::assert_debug_snapshot!(
to_org_table(
r#"|
|-
|a
|-
| a |
"#
).syntax,
@r###"
ORG_TABLE@0..20
ORG_TABLE_STANDARD_ROW@0..2
PIPE@0..1 "|"
WHITESPACE@1..2 "\n"
ORG_TABLE_RULE_ROW@2..5
TEXT@2..5 "|-\n"
ORG_TABLE_STANDARD_ROW@5..8
PIPE@5..6 "|"
ORG_TABLE_CELL@6..7
TEXT@6..7 "a"
WHITESPACE@7..8 "\n"
ORG_TABLE_RULE_ROW@8..11
TEXT@8..11 "|-\n"
ORG_TABLE_STANDARD_ROW@11..20
PIPE@11..12 "|"
WHITESPACE@12..15 " "
ORG_TABLE_CELL@15..16
TEXT@15..16 "a"
WHITESPACE@16..18 " "
PIPE@18..19 "|"
WHITESPACE@19..20 "\n"
"###
);
}
#[test]
fn parse_table_el() {
use crate::{ast::TableEl, tests::to_ast, ParseConfig};
let to_table_el = to_ast::<TableEl>(table_el_node);
insta::assert_debug_snapshot!(
to_table_el(
r#" +---+
| |
+---+
"#
).syntax,
@r###"
TABLE_EL@0..37
TEXT@0..32 " +---+\n | |\n ..."
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
BLANK_LINE@33..37
WHITESPACE@33..37 " "
"###
);
let config = &ParseConfig::default();
assert!(table_el_node(("", config).into()).is_err());
assert!(table_el_node(("+----|---", config).into()).is_err());
}

64
src/syntax/target.rs Normal file
View file

@ -0,0 +1,64 @@
use nom::{
bytes::complete::take_while,
combinator::{map, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{debug_assert_lossless, l_angle2_token, node, r_angle2_token, GreenElement},
input::Input,
SyntaxKind::*,
};
pub fn target_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_angle2_token,
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &Input| {
s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ')
},
),
r_angle2_token,
)),
|(l_angle2, target, r_angle2)| node(TARGET, [l_angle2, target.text_token(), r_angle2]),
))(input)
}
#[test]
fn parse() {
use crate::{ast::Target, tests::to_ast, ParseConfig};
let to_target = to_ast::<Target>(target_node);
insta::assert_debug_snapshot!(
to_target("<<target>>").syntax,
@r###"
TARGET@0..10
L_ANGLE2@0..2 "<<"
TEXT@2..8 "target"
R_ANGLE2@8..10 ">>"
"###
);
insta::assert_debug_snapshot!(
to_target("<<tar get>>").syntax,
@r###"
TARGET@0..11
L_ANGLE2@0..2 "<<"
TEXT@2..9 "tar get"
R_ANGLE2@9..11 ">>"
"###
);
let config = &ParseConfig::default();
assert!(target_node(("<<target >>", config).into()).is_err());
assert!(target_node(("<< target>>", config).into()).is_err());
assert!(target_node(("<<ta<get>>", config).into()).is_err());
assert!(target_node(("<<ta>get>>", config).into()).is_err());
assert!(target_node(("<<ta\nget>>", config).into()).is_err());
assert!(target_node(("<<target>", config).into()).is_err());
}

326
src/syntax/timestamp.rs Normal file
View file

@ -0,0 +1,326 @@
use nom::{
bytes::complete::{take, take_till, take_while},
character::complete::{space0, space1},
combinator::{map, opt, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{
colon_token, debug_assert_lossless, l_angle_token, l_bracket_token, l_parens_token,
minus2_token, minus_token, node, percent2_token, r_angle_token, r_bracket_token,
r_parens_token, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
pub fn timestamp_diary_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(map(
tuple((
l_angle_token,
percent2_token,
l_parens_token,
take_till(|c| c == ')' || c == '>' || c == '\n'),
r_parens_token,
r_angle_token,
)),
|(l_angle, percent2, l_paren, value, r_paren, r_angle)| {
node(
TIMESTAMP_DIARY,
[
l_angle,
percent2,
l_paren,
value.text_token(),
r_paren,
r_angle,
],
)
},
))(input)
}
fn is_digit_str(s: &Input) -> bool {
s.as_str().bytes().all(|u| u.is_ascii_digit())
}
fn date(i: Input) -> IResult<Input, [GreenElement; 7], ()> {
map(
tuple((
verify(take(4usize), is_digit_str),
minus_token,
verify(take(2usize), is_digit_str),
minus_token,
verify(take(2usize), is_digit_str),
space1,
take_while(|c: char| {
!c.is_ascii_whitespace()
&& !c.is_ascii_digit()
&& c != '+'
&& c != '-'
&& c != ']'
&& c != '>'
}),
)),
|(year, minus, month, minus_, day, ws, dayname)| {
[
year.token(TIMESTAMP_YEAR),
minus,
month.token(TIMESTAMP_MONTH),
minus_,
day.token(TIMESTAMP_DAY),
ws.ws_token(),
dayname.token(TIMESTAMP_DAYNAME),
]
},
)(i)
}
fn time(i: Input) -> IResult<Input, [GreenElement; 3], ()> {
map(
tuple((
verify(take(2usize), is_digit_str),
colon_token,
verify(take(2usize), is_digit_str),
)),
|(hour, colon, minute)| {
[
hour.token(TIMESTAMP_HOUR),
colon,
minute.token(TIMESTAMP_MINUTE),
]
},
)(i)
}
fn timestamp_active_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l_angle) = l_angle_token(input)?;
let (input, start_date) = date(input)?;
let (input, start_time) = opt(tuple((space1, time)))(input)?;
let mut b = NodeBuilder::new();
b.push(l_angle);
b.children.extend(start_date);
if input.as_str().starts_with('-') {
let (ws, start_time) = match start_time {
Some(start_time) => start_time,
None => return Err(nom::Err::Error(())),
};
let (input, minus) = minus_token(input)?;
let (input, end_time) = time(input)?;
let (input, space) = space0(input)?;
// TODO: delay-or-repeater
let (input, r_angle) = r_angle_token(input)?;
b.ws(ws);
b.children.extend(start_time);
b.push(minus);
b.children.extend(end_time);
b.ws(space);
b.push(r_angle);
return Ok((input, b.finish(TIMESTAMP_ACTIVE)));
}
let (input, space) = space0(input)?;
let (input, r_angle) = r_angle_token(input)?;
if let Some((ws, start_time)) = start_time {
b.ws(ws);
b.children.extend(start_time);
}
b.ws(space);
b.push(r_angle);
if input.as_str().starts_with("--<") {
let (input, minus2) = minus2_token(input)?;
let (input, l_angle) = l_angle_token(input)?;
let (input, end_date) = date(input)?;
let (input, end_time) = opt(tuple((space1, time)))(input)?;
let (input, space_) = space0(input)?;
// TODO: delay-or-repeater
let (input, r_angle) = r_angle_token(input)?;
b.children.extend([minus2, l_angle]);
b.children.extend(end_date);
if let Some((ws, end_time)) = end_time {
b.ws(ws);
b.children.extend(end_time);
}
b.ws(space_);
b.push(r_angle);
Ok((input, b.finish(TIMESTAMP_ACTIVE)))
} else {
Ok((input, b.finish(TIMESTAMP_ACTIVE)))
}
}
fn timestamp_inactive_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l_bracket) = l_bracket_token(input)?;
let (input, start_date) = date(input)?;
let (input, start_time) = opt(tuple((space1, time)))(input)?;
let mut b = NodeBuilder::new();
b.push(l_bracket);
b.children.extend(start_date);
if input.s.starts_with('-') {
let (ws, start_time) = match start_time {
Some(start_time) => start_time,
None => return Err(nom::Err::Error(())),
};
let (input, minus) = minus_token(input)?;
let (input, end_time) = time(input)?;
let (input, space) = space0(input)?;
// TODO: delay-or-repeater
let (input, r_bracket) = r_bracket_token(input)?;
b.ws(ws);
b.children.extend(start_time);
b.push(minus);
b.children.extend(end_time);
b.ws(space);
b.push(r_bracket);
return Ok((input, b.finish(TIMESTAMP_INACTIVE)));
}
let (input, space) = space0(input)?;
let (input, r_bracket) = r_bracket_token(input)?;
if let Some((ws, start_time)) = start_time {
b.ws(ws);
b.children.extend(start_time);
}
b.ws(space);
b.push(r_bracket);
if input.s.starts_with("--[") {
let (input, minus2) = minus2_token(input)?;
let (input, l_bracket) = l_bracket_token(input)?;
let (input, end_date) = date(input)?;
let (input, end_time) = opt(tuple((space1, time)))(input)?;
let (input, space_) = space0(input)?;
// TODO: delay-or-repeater
let (input, r_bracket) = r_bracket_token(input)?;
b.children.extend([minus2, l_bracket]);
b.children.extend(end_date);
if let Some((ws, end_time)) = end_time {
b.ws(ws);
b.children.extend(end_time);
}
b.ws(space_);
b.push(r_bracket);
Ok((input, b.finish(TIMESTAMP_INACTIVE)))
} else {
Ok((input, b.finish(TIMESTAMP_INACTIVE)))
}
}
pub fn timestamp_active_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(timestamp_active_node_base)(input)
}
pub fn timestamp_inactive_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert_lossless(timestamp_inactive_node_base)(input)
}
#[test]
fn parse() {
use crate::{ast::Timestamp, tests::to_ast};
let to_timestamp = to_ast::<Timestamp>(timestamp_inactive_node);
let ts = to_timestamp("[2003-09-16 Tue]");
assert!(!ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..16
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
R_BRACKET@15..16 "]"
"###
);
let ts = to_timestamp("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]");
assert!(ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..46
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
WHITESPACE@15..16 " "
TIMESTAMP_HOUR@16..18 "09"
COLON@18..19 ":"
TIMESTAMP_MINUTE@19..21 "39"
R_BRACKET@21..22 "]"
MINUS2@22..24 "--"
L_BRACKET@24..25 "["
TIMESTAMP_YEAR@25..29 "2003"
MINUS@29..30 "-"
TIMESTAMP_MONTH@30..32 "09"
MINUS@32..33 "-"
TIMESTAMP_DAY@33..35 "16"
WHITESPACE@35..36 " "
TIMESTAMP_DAYNAME@36..39 "Tue"
WHITESPACE@39..40 " "
TIMESTAMP_HOUR@40..42 "10"
COLON@42..43 ":"
TIMESTAMP_MINUTE@43..45 "39"
R_BRACKET@45..46 "]"
"###
);
let ts = to_timestamp("[2003-09-16 Tue 09:39-10:39]");
assert!(ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..28
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
WHITESPACE@15..16 " "
TIMESTAMP_HOUR@16..18 "09"
COLON@18..19 ":"
TIMESTAMP_MINUTE@19..21 "39"
MINUS@21..22 "-"
TIMESTAMP_HOUR@22..24 "10"
COLON@24..25 ":"
TIMESTAMP_MINUTE@25..27 "39"
R_BRACKET@27..28 "]"
"###
);
}