feat: support entity

This commit is contained in:
PoiScript 2023-11-19 16:41:03 +08:00
parent 917fe2f5d2
commit 310664b05b
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
14 changed files with 805 additions and 11 deletions

120
src/syntax/entity.rs Normal file
View file

@ -0,0 +1,120 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::alphanumeric1,
combinator::opt,
IResult,
};
use crate::{
entities::ENTITIES,
syntax::combinator::{backslash_token, node},
SyntaxKind,
};
use super::{combinator::GreenElement, input::Input};
pub fn entity_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with('\\'));
let mut parser = alt((template1, template2));
crate::lossless_parser!(parser, input)
}
// \NAME POST or // \NAME{}
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, name) = alphanumeric1(input)?;
if ENTITIES.iter().all(|i| i.0 != name.s) {
return Err(nom::Err::Error(()));
}
let (input, brackets) = opt(tag("{}"))(input)?;
if let Some(brackets) = brackets {
return Ok((
input,
node(
SyntaxKind::ENTITY,
[backslash, name.text_token(), brackets.text_token()],
),
));
}
if let Some(post) = input.bytes().next() {
if post.is_ascii_alphabetic() {
return Err(nom::Err::Error(()));
}
}
Ok((
input,
node(SyntaxKind::ENTITY, [backslash, name.text_token()]),
))
}
// \_SPACES
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, underscore) = tag("_")(input)?;
let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?;
Ok((
input,
node(
SyntaxKind::ENTITY,
[
backslash,
underscore.token(SyntaxKind::UNDERSCORE),
spaces.text_token(),
],
),
))
}
#[test]
fn parse() {
use crate::{ast::Entity, tests::to_ast, ParseConfig};
let to_entity = to_ast::<Entity>(entity_node);
insta::assert_debug_snapshot!(
to_entity("\\cent").syntax,
@r###"
ENTITY@0..5
BACKSLASH@0..1 "\\"
TEXT@1..5 "cent"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\S").syntax,
@r###"
ENTITY@0..2
BACKSLASH@0..1 "\\"
TEXT@1..2 "S"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\frac12{}test").syntax,
@r###"
ENTITY@0..9
BACKSLASH@0..1 "\\"
TEXT@1..7 "frac12"
TEXT@7..9 "{}"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\_ ").syntax,
@r###"
ENTITY@0..21
BACKSLASH@0..1 "\\"
UNDERSCORE@1..2 "_"
TEXT@2..21 " "
"###
);
let c = ParseConfig::default();
assert!(entity_node(("\\poi", &c).into()).is_err());
}

View file

@ -10,6 +10,7 @@ pub mod drawer;
pub mod dyn_block;
pub mod element;
pub mod emphasis;
pub mod entity;
pub mod fixed_width;
pub mod fn_def;
pub mod fn_ref;
@ -196,6 +197,7 @@ pub enum SyntaxKind {
UNDERLINE,
VERBATIM,
CODE,
ENTITY,
/* timestamp */
TIMESTAMP_ACTIVE,

View file

@ -4,6 +4,7 @@ use super::{
combinator::GreenElement,
cookie::cookie_node,
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
entity::entity_node,
fn_ref::fn_ref_node,
inline_call::inline_call_node,
inline_src::inline_src_node,
@ -41,7 +42,7 @@ impl<'a> Iterator for ObjectPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.input.input_len() < 3 || self.pos >= self.input.input_len() {
if self.input.input_len() < 2 || self.pos >= self.input.input_len() {
return None;
}
@ -56,7 +57,7 @@ impl<'a> Iterator for ObjectPositions<'a> {
let p = match bytes[i] {
b'{' => {
if self.input.s.len() - self.pos > 3 {
if self.input.s.len() - self.pos > 2 {
self.next = Some(self.pos);
}
self.pos - 1
@ -73,8 +74,8 @@ impl<'a> Iterator for ObjectPositions<'a> {
self.input.s.len()
);
// a valid object requires at least three characters
if self.input.s.len() - p < 3 {
// a valid object requires at least two characters
if self.input.s.len() - p < 2 {
return None;
}
@ -92,8 +93,8 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
'l: while !i.is_empty() {
for (input, head) in ObjectPositions::new(i) {
debug_assert!(
input.s.len() >= 3,
"object must have at least three characters: {:?}",
input.s.len() >= 2,
"object must have at least two characters: {:?}",
input.s
);
if let Ok((input, node)) = object_node(input) {
@ -146,7 +147,7 @@ fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
b'c' => inline_call_node(i),
b's' => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
_ => Err(nom::Err::Error(())),
}
}
@ -155,9 +156,13 @@ fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
fn positions() {
let config = crate::ParseConfig::default();
let vec = ObjectPositions::new(("*{", &config).into()).collect::<Vec<_>>();
let vec = ObjectPositions::new(("*", &config).into()).collect::<Vec<_>>();
assert!(vec.is_empty());
let vec = ObjectPositions::new(("*{", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "*{");
// https://github.com/PoiScript/orgize/issues/69
let vec = ObjectPositions::new(("{3}", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 2);
@ -166,12 +171,13 @@ fn positions() {
assert_eq!(vec[1].0.s, "{3}");
let vec = ObjectPositions::new(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 5);
assert_eq!(vec.len(), 6);
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
assert_eq!(vec[2].0.s, "()}//s\nc<<");
assert_eq!(vec[3].0.s, ")}//s\nc<<");
assert_eq!(vec[4].0.s, "c<<");
assert_eq!(vec[5].0.s, "<<");
}
#[test]