feat: support latex fragment parsing
This commit is contained in:
parent
2aec1768da
commit
4a3dd6aacb
6 changed files with 269 additions and 6 deletions
|
|
@ -58,6 +58,7 @@ token_parser!(minus2_token, "--", MINUS2);
|
|||
// token_parser!(percent_token, "%", PERCENT);
|
||||
token_parser!(percent2_token, "%%", PERCENT2);
|
||||
// token_parser!(slash_token, "/", SLASH);
|
||||
token_parser!(backslash_token, "\\", BACKSLASH);
|
||||
// token_parser!(underscore_token, "_", UNDERSCORE);
|
||||
// token_parser!(star_token, "*", STAR);
|
||||
token_parser!(plus_token, "+", PLUS);
|
||||
|
|
@ -65,6 +66,8 @@ token_parser!(minus_token, "-", MINUS);
|
|||
token_parser!(colon_token, ":", COLON);
|
||||
token_parser!(colon2_token, "::", COLON2);
|
||||
token_parser!(pipe_token, "|", PIPE);
|
||||
token_parser!(dollar_token, "$", DOLLAR);
|
||||
token_parser!(dollar2_token, "$$", DOLLAR2);
|
||||
// token_parser!(equal_token, "=", EQUAL);
|
||||
// token_parser!(tilde_token, "~", TILDE);
|
||||
token_parser!(hash_plus_token, "#+", HASH_PLUS);
|
||||
|
|
|
|||
196
src/syntax/latex_fragment.rs
Normal file
196
src/syntax/latex_fragment.rs
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{take_until1, take_while1},
|
||||
character::complete::alpha1,
|
||||
sequence::tuple,
|
||||
AsBytes, IResult, InputTake,
|
||||
};
|
||||
|
||||
use crate::SyntaxKind;
|
||||
|
||||
use super::{
|
||||
combinator::{
|
||||
backslash_token, dollar2_token, dollar_token, l_bracket_token, l_curly_token,
|
||||
l_parens_token, node, r_bracket_token, r_curly_token, r_parens_token, GreenElement,
|
||||
},
|
||||
input::Input,
|
||||
};
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn latex_fragment_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(input.s.starts_with(['\\', '$']));
|
||||
let mut parser = alt((template1, template2, template3, template4, template5));
|
||||
crate::lossless_parser!(parser, input)
|
||||
}
|
||||
|
||||
// \NAME[CONTENTS1] \NAME{CONTENTS1}
|
||||
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, (backslash, name)) = tuple((backslash_token, alpha1))(input)?;
|
||||
let (input, (l, content, r)) = alt((
|
||||
tuple((
|
||||
l_bracket_token,
|
||||
take_while1(|c| c != '{' && c != '}' && c != '[' && c != ']' && c != '\r' && c != '\n'),
|
||||
r_bracket_token,
|
||||
)),
|
||||
tuple((
|
||||
l_curly_token,
|
||||
take_while1(|c| c != '{' && c != '}' && c != '\r' && c != '\n'),
|
||||
r_curly_token,
|
||||
)),
|
||||
))(input)?;
|
||||
Ok((
|
||||
input,
|
||||
node(
|
||||
SyntaxKind::LATEX_FRAGMENT,
|
||||
[backslash, name.text_token(), l, content.text_token(), r],
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
// \(CONTENTS\)
|
||||
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, (backslash1, l)) = tuple((backslash_token, l_parens_token))(input)?;
|
||||
if let Some(i) = jetscii::Substring::new("\\)").find(input.s) {
|
||||
let (input, content) = input.take_split(i);
|
||||
let (input, (backslash2, r)) = tuple((backslash_token, r_parens_token))(input)?;
|
||||
Ok((
|
||||
input,
|
||||
node(
|
||||
SyntaxKind::LATEX_FRAGMENT,
|
||||
[backslash1, l, content.text_token(), backslash2, r],
|
||||
),
|
||||
))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
// \[CONTENTS\]
|
||||
fn template3(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, (backslash1, l)) = tuple((backslash_token, l_bracket_token))(input)?;
|
||||
if let Some(i) = jetscii::Substring::new("\\]").find(input.s) {
|
||||
let (input, content) = input.take_split(i);
|
||||
let (input, (backslash2, r)) = tuple((backslash_token, r_bracket_token))(input)?;
|
||||
Ok((
|
||||
input,
|
||||
node(
|
||||
SyntaxKind::LATEX_FRAGMENT,
|
||||
[backslash1, l, content.text_token(), backslash2, r],
|
||||
),
|
||||
))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
// $$CONTENTS$$
|
||||
fn template4(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, l) = dollar2_token(input)?;
|
||||
let (input, content) = take_until1("$$")(input)?;
|
||||
let (input, r) = dollar2_token(input)?;
|
||||
Ok((
|
||||
input,
|
||||
node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]),
|
||||
))
|
||||
}
|
||||
|
||||
// $CONTENTS$
|
||||
fn template5(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, l) = dollar_token(input)?;
|
||||
let (input, content) = take_until1("$")(input)?;
|
||||
let (input, r) = dollar_token(input)?;
|
||||
|
||||
let b = content.as_bytes()[0];
|
||||
if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b';' | b'$') {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
let b = content.as_bytes()[content.s.len() - 1];
|
||||
if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b'$') {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
let p = input.bytes().next();
|
||||
if let Some(p) = p {
|
||||
if !matches!(p, b')' | b'}' | b']' | b'\'' | b'"' | b' ' | b'\r' | b'\n') {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
input,
|
||||
node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]),
|
||||
))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::{ast::LatexFragment, tests::to_ast, ParseConfig};
|
||||
|
||||
let to_fragment = to_ast::<LatexFragment>(latex_fragment_node);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_fragment("\\enlargethispage{2\\baselineskip}").syntax,
|
||||
@r###"
|
||||
LATEX_FRAGMENT@0..32
|
||||
BACKSLASH@0..1 "\\"
|
||||
TEXT@1..16 "enlargethispage"
|
||||
L_CURLY@16..17 "{"
|
||||
TEXT@17..31 "2\\baselineskip"
|
||||
R_CURLY@31..32 "}"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_fragment("\\[a\\]").syntax,
|
||||
@r###"
|
||||
LATEX_FRAGMENT@0..5
|
||||
BACKSLASH@0..1 "\\"
|
||||
L_BRACKET@1..2 "["
|
||||
TEXT@2..3 "a"
|
||||
BACKSLASH@3..4 "\\"
|
||||
R_BRACKET@4..5 "]"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_fragment("\\(e^{i \\pi}\\)").syntax,
|
||||
@r###"
|
||||
LATEX_FRAGMENT@0..13
|
||||
BACKSLASH@0..1 "\\"
|
||||
L_PARENS@1..2 "("
|
||||
TEXT@2..11 "e^{i \\pi}"
|
||||
BACKSLASH@11..12 "\\"
|
||||
R_PARENS@12..13 ")"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_fragment("$\\frac{1}{3}$").syntax,
|
||||
@r###"
|
||||
LATEX_FRAGMENT@0..13
|
||||
DOLLAR@0..1 "$"
|
||||
TEXT@1..12 "\\frac{1}{3}"
|
||||
DOLLAR@12..13 "$"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_fragment("$a\nb$").syntax,
|
||||
@r###"
|
||||
LATEX_FRAGMENT@0..5
|
||||
DOLLAR@0..1 "$"
|
||||
TEXT@1..4 "a\nb"
|
||||
DOLLAR@4..5 "$"
|
||||
"###
|
||||
);
|
||||
|
||||
let c = ParseConfig::default();
|
||||
|
||||
assert!(latex_fragment_node(("$ LaTeXxxx$", &c).into()).is_err());
|
||||
assert!(latex_fragment_node(("$LaTeXxxx $", &c).into()).is_err());
|
||||
assert!(latex_fragment_node(("$a.$", &c).into()).is_err());
|
||||
assert!(latex_fragment_node(("$a$a", &c).into()).is_err());
|
||||
assert!(latex_fragment_node(("$$b\nol\nd*", &c).into()).is_err());
|
||||
assert!(latex_fragment_node(("$b\nol\nd*", &c).into()).is_err());
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ pub mod inline_call;
|
|||
pub mod inline_src;
|
||||
pub mod input;
|
||||
pub mod keyword;
|
||||
pub mod latex_fragment;
|
||||
pub mod link;
|
||||
pub mod list;
|
||||
pub mod macros;
|
||||
|
|
@ -85,6 +86,9 @@ pub enum SyntaxKind {
|
|||
PERCENT, // '%'
|
||||
PERCENT2, // '%%'
|
||||
SLASH, // '/'
|
||||
BACKSLASH, // '\'
|
||||
DOLLAR, // '$'
|
||||
DOLLAR2, // '$$'
|
||||
UNDERSCORE, // '_'
|
||||
STAR, // '*'
|
||||
PLUS, // '+'
|
||||
|
|
@ -168,6 +172,7 @@ pub enum SyntaxKind {
|
|||
BLOCK_BEGIN,
|
||||
BLOCK_END,
|
||||
BLOCK_CONTENT,
|
||||
LATEX_ENVIRONMENT,
|
||||
|
||||
//
|
||||
// objects
|
||||
|
|
@ -179,7 +184,7 @@ pub enum SyntaxKind {
|
|||
COOKIE,
|
||||
RADIO_TARGET,
|
||||
FN_REF,
|
||||
LATEX_ENVIRONMENT,
|
||||
LATEX_FRAGMENT,
|
||||
MACROS,
|
||||
MACROS_ARGUMENT,
|
||||
SNIPPET,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use super::{
|
|||
inline_call::inline_call_node,
|
||||
inline_src::inline_src_node,
|
||||
input::Input,
|
||||
latex_fragment::latex_fragment_node,
|
||||
link::link_node,
|
||||
macros::macros_node,
|
||||
radio_target::radio_target_node,
|
||||
|
|
@ -29,7 +30,9 @@ impl ObjectPositions<'_> {
|
|||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'),
|
||||
finder: jetscii::bytes!(
|
||||
b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n', b'\\', b'$'
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -142,6 +145,8 @@ fn object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
.or_else(|_| timestamp_inactive_node(i)),
|
||||
b'c' => inline_call_node(i),
|
||||
b's' => inline_src_node(i),
|
||||
b'$' => latex_fragment_node(i),
|
||||
b'\\' => latex_fragment_node(i),
|
||||
_ => Err(nom::Err::Error(())),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue