194 lines
5.3 KiB
Rust
194 lines
5.3 KiB
Rust
use nom::{AsBytes, IResult, InputLength, InputTake};
|
|
|
|
use super::{
|
|
combinator::GreenElement,
|
|
cookie::cookie_node,
|
|
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
|
|
fn_ref::fn_ref_node,
|
|
inline_call::inline_call_node,
|
|
inline_src::inline_src_node,
|
|
input::Input,
|
|
link::link_node,
|
|
macros::macros_node,
|
|
radio_target::radio_target_node,
|
|
snippet::snippet_node,
|
|
target::target_node,
|
|
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
|
|
};
|
|
|
|
pub struct InlinePositions<'a> {
|
|
bytes: &'a [u8],
|
|
pos: usize,
|
|
next: Option<usize>,
|
|
}
|
|
|
|
impl InlinePositions<'_> {
|
|
pub fn new(bytes: &[u8]) -> InlinePositions {
|
|
InlinePositions {
|
|
bytes,
|
|
pos: 0,
|
|
next: Some(0),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Iterator for InlinePositions<'_> {
|
|
type Item = usize;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next.take().or_else(|| {
|
|
jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n')
|
|
.find(&self.bytes[self.pos..])
|
|
.map(|i| {
|
|
self.pos += i + 1;
|
|
|
|
match self.bytes[self.pos - 1] {
|
|
b'{' => {
|
|
self.next = Some(self.pos);
|
|
self.pos - 1
|
|
}
|
|
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
|
|
_ => self.pos - 1,
|
|
}
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
|
// debug_assert!(!input.is_empty());
|
|
let nodes = object_nodes_base(input);
|
|
debug_assert_eq!(
|
|
input.as_str(),
|
|
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
|
|
"parser must be lossless"
|
|
);
|
|
nodes
|
|
}
|
|
|
|
fn object_nodes_base(input: Input) -> Vec<GreenElement> {
|
|
let mut children = vec![];
|
|
|
|
let mut i = input;
|
|
'l: loop {
|
|
for (input, head) in InlinePositions::new(i.as_bytes()).map(|idx| i.take_split(idx)) {
|
|
if let Ok((input, node)) = object_node(input) {
|
|
if !head.is_empty() {
|
|
children.push(head.text_token())
|
|
}
|
|
children.push(node);
|
|
i = input;
|
|
continue 'l;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if !i.is_empty() {
|
|
children.push(i.text_token());
|
|
}
|
|
|
|
children
|
|
}
|
|
|
|
fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|
if i.input_len() < 3 {
|
|
return Err(nom::Err::Error(()));
|
|
}
|
|
|
|
match &i.as_bytes()[0] {
|
|
b'*' => bold_node(i),
|
|
b'+' => strike_node(i),
|
|
b'/' => italic_node(i),
|
|
b'_' => underline_node(i),
|
|
b'=' => verbatim_node(i),
|
|
b'~' => code_node(i),
|
|
b'@' => snippet_node(i),
|
|
b'{' => macros_node(i),
|
|
b'<' => radio_target_node(i)
|
|
.or_else(|_| target_node(i))
|
|
.or_else(|_| timestamp_diary_node(i))
|
|
.or_else(|_| timestamp_active_node(i)),
|
|
b'[' => cookie_node(i)
|
|
.or_else(|_| link_node(i))
|
|
.or_else(|_| fn_ref_node(i))
|
|
.or_else(|_| timestamp_inactive_node(i)),
|
|
b'c' => inline_call_node(i),
|
|
b's' => inline_src_node(i),
|
|
_ => Err(nom::Err::Error(())),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn parse() {
|
|
use crate::{
|
|
syntax::{combinator::node, SyntaxKind, SyntaxNode},
|
|
ParseConfig,
|
|
};
|
|
|
|
let t = |input: &str| {
|
|
let config = &ParseConfig::default();
|
|
let children = object_nodes((input, config).into());
|
|
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
|
|
};
|
|
|
|
insta::assert_debug_snapshot!(
|
|
t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"),
|
|
@r###"
|
|
PARAGRAPH@0..93
|
|
CODE@0..26
|
|
TILDE@0..1 "~"
|
|
TEXT@1..25 "org-inlinetask-min-level"
|
|
TILDE@25..26 "~"
|
|
FN_REF@26..93
|
|
L_BRACKET@26..27 "["
|
|
TEXT@27..29 "fn"
|
|
COLON@29..30 ":"
|
|
TEXT@30..34 "oiml"
|
|
COLON@34..35 ":"
|
|
TEXT@35..57 "The default value of \n"
|
|
CODE@57..83
|
|
TILDE@57..58 "~"
|
|
TEXT@58..82 "org-inlinetask-min-level"
|
|
TILDE@82..83 "~"
|
|
TEXT@83..87 " is "
|
|
VERBATIM@87..91
|
|
EQUAL@87..88 "="
|
|
TEXT@88..90 "15"
|
|
EQUAL@90..91 "="
|
|
TEXT@91..92 "."
|
|
R_BRACKET@92..93 "]"
|
|
"###
|
|
);
|
|
|
|
insta::assert_debug_snapshot!(
|
|
t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003.
|
|
The canonical parser is =org-element.el=, which provides a number of
|
|
functions starting with ~org-element-~."#),
|
|
@r###"
|
|
PARAGRAPH@0..175
|
|
TEXT@0..9 "Org is a "
|
|
ITALIC@9..34
|
|
SLASH@9..10 "/"
|
|
TEXT@10..33 "plaintext markup syntax"
|
|
SLASH@33..34 "/"
|
|
TEXT@34..50 " developed with "
|
|
BOLD@50..57
|
|
STAR@50..51 "*"
|
|
TEXT@51..56 "Emacs"
|
|
STAR@56..57 "*"
|
|
TEXT@57..91 " in 2003.\nThe canonic ..."
|
|
VERBATIM@91..107
|
|
EQUAL@91..92 "="
|
|
TEXT@92..106 "org-element.el"
|
|
EQUAL@106..107 "="
|
|
TEXT@107..160 ", which provides a nu ..."
|
|
CODE@160..174
|
|
TILDE@160..161 "~"
|
|
TEXT@161..173 "org-element-"
|
|
TILDE@173..174 "~"
|
|
TEXT@174..175 "."
|
|
"###
|
|
);
|
|
}
|