From 00cfde6e905de5c8b213b21566a8e4a87b16df4b Mon Sep 17 00:00:00 2001 From: PoiScript Date: Sun, 19 Nov 2023 11:57:31 +0800 Subject: [PATCH] feat: support latex environment parsing --- src/syntax/element.rs | 4 +- src/syntax/latex_environment.rs | 126 ++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 1 + 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 src/syntax/latex_environment.rs diff --git a/src/syntax/element.rs b/src/syntax/element.rs index 85d352f..b133f15 100644 --- a/src/syntax/element.rs +++ b/src/syntax/element.rs @@ -14,6 +14,7 @@ use super::{ fn_def::fn_def_node, input::Input, keyword::{affiliated_keyword_nodes, keyword_node}, + latex_environment::latex_environment_node, list::list_node, paragraph::{paragraph_node, paragraph_nodes}, rule::rule_node, @@ -96,6 +97,7 @@ pub fn element_node(input: Input) -> IResult { .or_else(|_| keyword_node(input)) .or_else(|_| dyn_block_node(input)) .or_else(|_| comment_node(input)), + Some(b'\\') => latex_environment_node(input), _ => Err(nom::Err::Error(())), }; @@ -134,7 +136,7 @@ impl<'a> Iterator for ElementPositions<'a> { if matches!( b, - b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' + b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\' ) { let previous = self.pos; self.pos = iter diff --git a/src/syntax/latex_environment.rs b/src/syntax/latex_environment.rs new file mode 100644 index 0000000..2136585 --- /dev/null +++ b/src/syntax/latex_environment.rs @@ -0,0 +1,126 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_while1}, + character::complete::{line_ending, space0}, + combinator::eof, + sequence::tuple, + IResult, InputTake, +}; + +use crate::SyntaxKind; + +use super::{ + combinator::{l_curly_token, line_starts_iter, node, r_curly_token, GreenElement}, + input::Input, +}; + +#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))] +pub fn latex_environment_node(input: Input) -> IResult { + crate::lossless_parser!(latex_environment_node_base, input) +} + +fn latex_environment_node_base(input: Input) -> IResult { + let (input, (ws1, begin, l1, name1, r1)) = tuple(( + space0, + tag("\\begin"), + l_curly_token, + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '*'), + r_curly_token, + ))(input)?; + + for (input, contents) in line_starts_iter(input.s).map(|i| input.take_split(i)) { + if let Ok((input, (ws2, end, l2, name2, r2, ws3, nl))) = tuple(( + space0, + tag("\\end"), + l_curly_token, + tag(name1.s), + r_curly_token, + space0, + alt((line_ending, eof)), + ))(input) + { + return Ok(( + input, + node( + SyntaxKind::LATEX_ENVIRONMENT, + [ + ws1.ws_token(), + begin.text_token(), + l1, + name1.text_token(), + r1, + contents.text_token(), + ws2.ws_token(), + end.text_token(), + l2, + name2.text_token(), + r2, + ws3.ws_token(), + nl.nl_token(), + ], + ), + )); + } + } + + Err(nom::Err::Error(())) +} + +#[test] +fn parse() { + use crate::ast::LatexEnvironment; + use crate::config::ParseConfig; + use crate::tests::to_ast; + + let to_latex = to_ast::(latex_environment_node); + + insta::assert_debug_snapshot!( + to_latex(r#"\begin{NAME}\end{NAME}"#).syntax, + @r###" + LATEX_ENVIRONMENT@0..22 + WHITESPACE@0..0 "" + TEXT@0..6 "\\begin" + L_CURLY@6..7 "{" + TEXT@7..11 "NAME" + R_CURLY@11..12 "}" + TEXT@12..12 "" + WHITESPACE@12..12 "" + TEXT@12..16 "\\end" + L_CURLY@16..17 "{" + TEXT@17..21 "NAME" + R_CURLY@21..22 "}" + WHITESPACE@22..22 "" + NEW_LINE@22..22 "" + "### + ); + + insta::assert_debug_snapshot!( + to_latex( + r#"\begin{align*} + 2x - 5y &= 8 \\ + 3x + 9y &= -12 + \end{align*}"# + ).syntax, + @r###" + LATEX_ENVIRONMENT@0..70 + WHITESPACE@0..0 "" + TEXT@0..6 "\\begin" + L_CURLY@6..7 "{" + TEXT@7..13 "align*" + R_CURLY@13..14 "}" + TEXT@14..54 "\n 2x - 5y &= 8 \\\\\n ..." + WHITESPACE@54..58 " " + TEXT@58..62 "\\end" + L_CURLY@62..63 "{" + TEXT@63..69 "align*" + R_CURLY@69..70 "}" + WHITESPACE@70..70 "" + NEW_LINE@70..70 "" + "### + ); + + let c = ParseConfig::default(); + + assert!(latex_environment_node((r#"\begin{equation}\end{align}"#, &c).into()).is_err()); + assert!(latex_environment_node((r#"\begin{_}\end{_}"#, &c).into()).is_err()); +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 7df9a45..df57fe5 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -18,6 +18,7 @@ pub mod inline_call; pub mod inline_src; pub mod input; pub mod keyword; +pub mod latex_environment; pub mod latex_fragment; pub mod link; pub mod list;