feat: support subscript and superscript
This commit is contained in:
parent
58dfb022c2
commit
8b5c545d4b
12 changed files with 346 additions and 63 deletions
|
|
@ -264,6 +264,14 @@ const nodes = [
|
|||
struct: "LineBreak",
|
||||
kind: ["LINE_BREAK"],
|
||||
},
|
||||
{
|
||||
struct: "Superscript",
|
||||
kind: ["SUPERSCRIPT"],
|
||||
},
|
||||
{
|
||||
struct: "Subscript",
|
||||
kind: ["SUBSCRIPT"],
|
||||
},
|
||||
];
|
||||
|
||||
let content = `//! generated file, do not modify it directly
|
||||
|
|
|
|||
|
|
@ -1716,3 +1716,53 @@ impl LineBreak {
|
|||
self.syntax.text_range().end().into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Superscript {
|
||||
pub(crate) syntax: SyntaxNode,
|
||||
}
|
||||
impl AstNode for Superscript {
|
||||
type Language = OrgLanguage;
|
||||
fn can_cast(kind: SyntaxKind) -> bool {
|
||||
kind == SUPERSCRIPT
|
||||
}
|
||||
fn cast(node: SyntaxNode) -> Option<Superscript> {
|
||||
Self::can_cast(node.kind()).then(|| Superscript { syntax: node })
|
||||
}
|
||||
fn syntax(&self) -> &SyntaxNode {
|
||||
&self.syntax
|
||||
}
|
||||
}
|
||||
impl Superscript {
|
||||
pub fn begin(&self) -> u32 {
|
||||
self.syntax.text_range().start().into()
|
||||
}
|
||||
pub fn end(&self) -> u32 {
|
||||
self.syntax.text_range().end().into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Subscript {
|
||||
pub(crate) syntax: SyntaxNode,
|
||||
}
|
||||
impl AstNode for Subscript {
|
||||
type Language = OrgLanguage;
|
||||
fn can_cast(kind: SyntaxKind) -> bool {
|
||||
kind == SUBSCRIPT
|
||||
}
|
||||
fn cast(node: SyntaxNode) -> Option<Subscript> {
|
||||
Self::can_cast(node.kind()).then(|| Subscript { syntax: node })
|
||||
}
|
||||
fn syntax(&self) -> &SyntaxNode {
|
||||
&self.syntax
|
||||
}
|
||||
}
|
||||
impl Subscript {
|
||||
pub fn begin(&self) -> u32 {
|
||||
self.syntax.text_range().start().into()
|
||||
}
|
||||
pub fn end(&self) -> u32 {
|
||||
self.syntax.text_range().end().into()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@
|
|||
/// special_block quote_block center_block verse_block comment_block example_block export_block
|
||||
/// source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros
|
||||
/// snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment
|
||||
/// latex_environment entity line_break
|
||||
/// latex_environment entity line_break superscript subscript
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
|
|
@ -203,6 +203,12 @@ macro_rules! forward_handler {
|
|||
(@method $handler:ty, line_break) => {
|
||||
forward_handler!(@method $handler, line_break, WalkEvent<&$crate::ast::LineBreak>);
|
||||
};
|
||||
(@method $handler:ty, superscript) => {
|
||||
forward_handler!(@method $handler, superscript, WalkEvent<&$crate::ast::Superscript>);
|
||||
};
|
||||
(@method $handler:ty, subscript) => {
|
||||
forward_handler!(@method $handler, subscript, WalkEvent<&$crate::ast::Subscript>);
|
||||
};
|
||||
(@method $handler:ty, $x:ident) => {
|
||||
std::compile_error!(std::concat!(std::stringify!($x), " is not a method"));
|
||||
};
|
||||
|
|
|
|||
|
|
@ -518,4 +518,20 @@ impl Traverser for HtmlExport {
|
|||
ctx.skip();
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, _ctx))]
|
||||
fn subscript(&mut self, event: WalkEvent<&Subscript>, _ctx: &mut TraversalContext) {
|
||||
match event {
|
||||
WalkEvent::Enter(_) => self.output += "<sub>",
|
||||
WalkEvent::Leave(_) => self.output += "</sub>",
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, _ctx))]
|
||||
fn superscript(&mut self, event: WalkEvent<&Superscript>, _ctx: &mut TraversalContext) {
|
||||
match event {
|
||||
WalkEvent::Enter(_) => self.output += "<sup>",
|
||||
WalkEvent::Leave(_) => self.output += "</sup>",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,6 +141,8 @@ pub trait Traverser {
|
|||
LATEX_ENVIRONMENT => traverse!(LatexEnvironment, latex_environment),
|
||||
ENTITY => traverse!(Entity, entity),
|
||||
LINE_BREAK => traverse!(LineBreak, line_break),
|
||||
SUPERSCRIPT => traverse!(Superscript, superscript),
|
||||
SUBSCRIPT => traverse!(Subscript, subscript),
|
||||
|
||||
BLOCK_CONTENT | LIST_ITEM_CONTENT => traverse_children!(node),
|
||||
|
||||
|
|
@ -252,4 +254,8 @@ pub trait Traverser {
|
|||
fn entity(&mut self, event: WalkEvent<&Entity>, ctx: &mut TraversalContext);
|
||||
/// Called when entering or leaving `LineBreak` node
|
||||
fn line_break(&mut self, event: WalkEvent<&LineBreak>, ctx: &mut TraversalContext);
|
||||
/// Called when entering or leaving `Superscript` node
|
||||
fn superscript(&mut self, event: WalkEvent<&Superscript>, ctx: &mut TraversalContext);
|
||||
/// Called when entering or leaving `Subscript` node
|
||||
fn subscript(&mut self, event: WalkEvent<&Subscript>, ctx: &mut TraversalContext);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ token_parser!(minus2_token, "--", MINUS2);
|
|||
token_parser!(percent2_token, "%%", PERCENT2);
|
||||
// token_parser!(slash_token, "/", SLASH);
|
||||
token_parser!(backslash_token, "\\", BACKSLASH);
|
||||
// token_parser!(underscore_token, "_", UNDERSCORE);
|
||||
token_parser!(underscore_token, "_", UNDERSCORE);
|
||||
// token_parser!(star_token, "*", STAR);
|
||||
token_parser!(plus_token, "+", PLUS);
|
||||
token_parser!(minus_token, "-", MINUS);
|
||||
|
|
@ -71,6 +71,7 @@ token_parser!(dollar2_token, "$$", DOLLAR2);
|
|||
// token_parser!(equal_token, "=", EQUAL);
|
||||
// token_parser!(tilde_token, "~", TILDE);
|
||||
token_parser!(hash_plus_token, "#+", HASH_PLUS);
|
||||
token_parser!(caret_token, "^", CARET);
|
||||
token_parser!(hash_token, "#", HASH);
|
||||
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,16 @@ fn validate_marker(pos: usize, text: Input) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn verify_pre(input: &str) -> bool {
|
||||
if input.is_empty() {
|
||||
return true;
|
||||
}
|
||||
matches!(
|
||||
input.as_bytes()[input.len() - 1],
|
||||
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::{ast::Bold, tests::to_ast, ParseConfig};
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ pub mod planning;
|
|||
pub mod radio_target;
|
||||
pub mod rule;
|
||||
pub mod snippet;
|
||||
pub mod subscript_superscript;
|
||||
pub mod table;
|
||||
pub mod target;
|
||||
pub mod timestamp;
|
||||
|
|
@ -106,6 +107,7 @@ pub enum SyntaxKind {
|
|||
DOUBLE_ARROW, // '=>'
|
||||
PIPE, // '|'
|
||||
COMMA, // ','
|
||||
CARET, // '^'
|
||||
NEW_LINE, // '\n' or '\r\n' or '\r'
|
||||
WHITESPACE, // ' ' or '\t'
|
||||
BLANK_LINE,
|
||||
|
|
@ -200,6 +202,8 @@ pub enum SyntaxKind {
|
|||
VERBATIM,
|
||||
CODE,
|
||||
ENTITY,
|
||||
SUPERSCRIPT,
|
||||
SUBSCRIPT,
|
||||
|
||||
/* timestamp */
|
||||
TIMESTAMP_ACTIVE,
|
||||
|
|
@ -241,6 +245,8 @@ impl SyntaxKind {
|
|||
| SyntaxKind::MACROS
|
||||
| SyntaxKind::RADIO_TARGET
|
||||
| SyntaxKind::COOKIE
|
||||
| SyntaxKind::SUPERSCRIPT
|
||||
| SyntaxKind::SUBSCRIPT
|
||||
| SyntaxKind::ORG_TABLE_CELL
|
||||
| SyntaxKind::TIMESTAMP_ACTIVE
|
||||
| SyntaxKind::TIMESTAMP_INACTIVE
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ use nom::{AsBytes, IResult, InputLength, InputTake};
|
|||
use super::{
|
||||
combinator::GreenElement,
|
||||
cookie::cookie_node,
|
||||
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
|
||||
emphasis::{
|
||||
self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node,
|
||||
},
|
||||
entity::entity_node,
|
||||
fn_ref::fn_ref_node,
|
||||
inline_call::inline_call_node,
|
||||
|
|
@ -15,6 +17,7 @@ use super::{
|
|||
macros::macros_node,
|
||||
radio_target::radio_target_node,
|
||||
snippet::snippet_node,
|
||||
subscript_superscript::{self, subscript_node, superscript_node},
|
||||
target::target_node,
|
||||
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
|
||||
};
|
||||
|
|
@ -22,7 +25,6 @@ use super::{
|
|||
struct ObjectPositions<'a> {
|
||||
input: Input<'a>,
|
||||
pos: usize,
|
||||
next: Option<usize>,
|
||||
finder: jetscii::BytesConst,
|
||||
}
|
||||
|
||||
|
|
@ -31,10 +33,17 @@ impl ObjectPositions<'_> {
|
|||
ObjectPositions {
|
||||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(
|
||||
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
|
||||
b'\\', b'$', b'@', b'<', b'['
|
||||
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
|
||||
b'@', /* snippet */
|
||||
b'<', /* timestamp, target, radio target */
|
||||
b'[', /* link, cookie, fn_ref, timestamp */
|
||||
b'c', /* inline call */
|
||||
b's', /* inline source */
|
||||
b'\\', b'$', /* latex & entity */
|
||||
b'{', /* macros */
|
||||
b'^', /* superscript */
|
||||
b'_' /* subscript */
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
@ -43,10 +52,11 @@ impl ObjectPositions<'_> {
|
|||
ObjectPositions {
|
||||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(
|
||||
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
|
||||
b'\\', b'$'
|
||||
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
|
||||
b'\\', b'$', /* latex & entity */
|
||||
b'^', /* superscript */
|
||||
b'_' /* subscript */
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
@ -60,25 +70,12 @@ impl<'a> Iterator for ObjectPositions<'a> {
|
|||
return None;
|
||||
}
|
||||
|
||||
if let Some(p) = self.next.take() {
|
||||
return Some(self.input.take_split(p));
|
||||
}
|
||||
|
||||
let bytes = &self.input.as_bytes()[self.pos..];
|
||||
let previous = self.pos;
|
||||
let i = self.finder.find(bytes)?;
|
||||
self.pos += i + 1;
|
||||
|
||||
let p = match bytes[i] {
|
||||
b'{' => {
|
||||
if self.input.s.len() - self.pos > 2 {
|
||||
self.next = Some(self.pos);
|
||||
}
|
||||
self.pos - 1
|
||||
}
|
||||
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
|
||||
_ => self.pos - 1,
|
||||
};
|
||||
let p = self.pos - 1;
|
||||
|
||||
debug_assert!(
|
||||
previous < self.pos && self.pos <= self.input.s.len(),
|
||||
|
|
@ -112,10 +109,10 @@ impl<'a> Iterator for ObjectPositions<'a> {
|
|||
/// - Timestamps
|
||||
/// - Text Markup (bold code strike verbatim underline italic)
|
||||
/// - Line Breaks
|
||||
/// - Subscript and Superscript
|
||||
///
|
||||
/// // todo:
|
||||
/// - Citations
|
||||
/// - Subscript and Superscript
|
||||
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
||||
// TODO:
|
||||
// debug_assert!(!input.is_empty());
|
||||
|
|
@ -125,11 +122,11 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ObjectPositions::standard(i) {
|
||||
if let Ok((input, node)) = standard_object_node(input) {
|
||||
if let Ok((input, pre)) = standard_object_node(input, head) {
|
||||
if !head.is_empty() {
|
||||
nodes.push(head.text_token())
|
||||
}
|
||||
nodes.push(node);
|
||||
nodes.push(pre);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
|
|
@ -157,8 +154,6 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
/// - LaTeX fragments ('\\')
|
||||
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
|
||||
/// - Entities ('\\')
|
||||
///
|
||||
/// // todo:
|
||||
/// - Superscripts and Subscripts
|
||||
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
||||
let mut i = input;
|
||||
|
|
@ -166,11 +161,11 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ObjectPositions::minimal(i) {
|
||||
if let Ok((input, node)) = minimal_object_node(input) {
|
||||
if let Ok((input, pre)) = minimal_object_node(input, head) {
|
||||
if !head.is_empty() {
|
||||
nodes.push(head.text_token())
|
||||
}
|
||||
nodes.push(node);
|
||||
nodes.push(pre);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
|
|
@ -195,7 +190,7 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
}
|
||||
|
||||
/// parse an object from standard sets
|
||||
fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
||||
fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
debug_assert!(
|
||||
i.s.len() >= 2,
|
||||
"object must have at least two characters: {:?}",
|
||||
|
|
@ -203,12 +198,12 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
);
|
||||
|
||||
match &i.as_bytes()[0] {
|
||||
b'*' => bold_node(i),
|
||||
b'+' => strike_node(i),
|
||||
b'/' => italic_node(i),
|
||||
b'_' => underline_node(i),
|
||||
b'=' => verbatim_node(i),
|
||||
b'~' => code_node(i),
|
||||
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
|
||||
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
|
||||
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
|
||||
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
|
||||
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
|
||||
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
|
||||
b'@' => snippet_node(i),
|
||||
b'{' => macros_node(i),
|
||||
b'<' => radio_target_node(i)
|
||||
|
|
@ -219,31 +214,38 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
.or_else(|_| link_node(i))
|
||||
.or_else(|_| fn_ref_node(i))
|
||||
.or_else(|_| timestamp_inactive_node(i)),
|
||||
b'c' => inline_call_node(i),
|
||||
b's' => inline_src_node(i),
|
||||
// NOTE: although not specified in document, inline call and inline src follows the
|
||||
// same pre tokens rule as text markup
|
||||
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
|
||||
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
|
||||
b'$' => latex_fragment_node(i),
|
||||
b'\\' => {
|
||||
if i.as_bytes()[1] == b'\\' {
|
||||
line_break_node(i)
|
||||
} else {
|
||||
entity_node(i).or_else(|_| latex_fragment_node(i))
|
||||
}
|
||||
}
|
||||
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
|
||||
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
|
||||
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
|
||||
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
|
||||
_ => Err(nom::Err::Error(())),
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an object from minimal sets
|
||||
fn minimal_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
||||
fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
debug_assert!(
|
||||
i.s.len() >= 2,
|
||||
"object must have at least two characters: {:?}",
|
||||
i.s
|
||||
);
|
||||
|
||||
match &i.as_bytes()[0] {
|
||||
b'*' => bold_node(i),
|
||||
b'+' => strike_node(i),
|
||||
b'/' => italic_node(i),
|
||||
b'_' => underline_node(i),
|
||||
b'=' => verbatim_node(i),
|
||||
b'~' => code_node(i),
|
||||
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
|
||||
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
|
||||
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
|
||||
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
|
||||
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
|
||||
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
|
||||
b'$' => latex_fragment_node(i),
|
||||
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
|
||||
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
|
||||
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
|
||||
_ => Err(nom::Err::Error(())),
|
||||
}
|
||||
}
|
||||
|
|
@ -261,19 +263,18 @@ fn positions() {
|
|||
|
||||
// https://github.com/PoiScript/orgize/issues/69
|
||||
let vec = ObjectPositions::standard(("{3}", &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 2);
|
||||
assert_eq!(vec.len(), 1);
|
||||
assert_eq!(vec[0].0.s, "{3}");
|
||||
// FIXME:
|
||||
assert_eq!(vec[1].0.s, "{3}");
|
||||
|
||||
let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 6);
|
||||
assert_eq!(vec.len(), 7);
|
||||
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
|
||||
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
|
||||
assert_eq!(vec[2].0.s, "()}//s\nc<<");
|
||||
assert_eq!(vec[3].0.s, ")}//s\nc<<");
|
||||
assert_eq!(vec[4].0.s, "c<<");
|
||||
assert_eq!(vec[5].0.s, "<<");
|
||||
assert_eq!(vec[2].0.s, "//s\nc<<");
|
||||
assert_eq!(vec[3].0.s, "/s\nc<<");
|
||||
assert_eq!(vec[4].0.s, "s\nc<<");
|
||||
assert_eq!(vec[5].0.s, "c<<");
|
||||
assert_eq!(vec[6].0.s, "<<");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -347,4 +348,15 @@ functions starting with ~org-element-~."#),
|
|||
TEXT@174..175 "."
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
t("a^abc"),
|
||||
@r###"
|
||||
PARAGRAPH@0..5
|
||||
TEXT@0..1 "a"
|
||||
SUPERSCRIPT@1..5
|
||||
CARET@1..2 "^"
|
||||
TEXT@2..5 "abc"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
|||
162
src/syntax/subscript_superscript.rs
Normal file
162
src/syntax/subscript_superscript.rs
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
use memchr::memchr2_iter;
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while1},
|
||||
combinator::opt,
|
||||
AsBytes, IResult, InputTake,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
syntax::{
|
||||
combinator::{caret_token, underscore_token},
|
||||
object::object_nodes,
|
||||
},
|
||||
SyntaxKind,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{l_curly_token, node, r_curly_token, GreenElement},
|
||||
input::Input,
|
||||
};
|
||||
|
||||
pub fn superscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, caret) = caret_token(input)?;
|
||||
|
||||
let mut children = vec![caret];
|
||||
|
||||
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
|
||||
children.push(star.text_token());
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else if let Ok((input, (l, contents, r))) = template1(input) {
|
||||
children.push(l);
|
||||
children.extend(object_nodes(contents));
|
||||
children.push(r);
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else if let Ok((input, (sign, contents))) = template2(input) {
|
||||
if let Some(s) = sign {
|
||||
children.push(s)
|
||||
}
|
||||
children.push(contents);
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn subscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, underscore) = underscore_token(input)?;
|
||||
|
||||
let mut children = vec![underscore];
|
||||
|
||||
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
|
||||
children.push(star.text_token());
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else if let Ok((input, (l, contents, r))) = template1(input) {
|
||||
children.push(l);
|
||||
children.extend(object_nodes(contents));
|
||||
children.push(r);
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else if let Ok((input, (sign, contents))) = template2(input) {
|
||||
if let Some(s) = sign {
|
||||
children.push(s)
|
||||
}
|
||||
children.push(contents);
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
fn template1(input: Input) -> IResult<Input, (GreenElement, Input, GreenElement), ()> {
|
||||
let (input, l) = l_curly_token(input)?;
|
||||
let (input, contents) = balanced_brackets(input)?;
|
||||
let (input, r) = r_curly_token(input)?;
|
||||
Ok((input, (l, contents, r)))
|
||||
}
|
||||
|
||||
fn template2(input: Input) -> IResult<Input, (Option<GreenElement>, GreenElement), ()> {
|
||||
let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?;
|
||||
|
||||
let (input, contents) =
|
||||
take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?;
|
||||
|
||||
if contents.s.ends_with(|c: char| !c.is_alphanumeric()) {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
Ok((input, (sign.map(|x| x.text_token()), contents.text_token())))
|
||||
}
|
||||
|
||||
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
|
||||
let mut pairs = 1;
|
||||
let bytes = input.as_bytes();
|
||||
for i in memchr2_iter(b'{', b'}', bytes) {
|
||||
if bytes[i] == b'{' {
|
||||
pairs += 1;
|
||||
} else if pairs != 1 {
|
||||
pairs -= 1;
|
||||
} else {
|
||||
return Ok(input.take_split(i));
|
||||
}
|
||||
}
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
|
||||
pub fn verify_pre(s: &str) -> bool {
|
||||
dbg!(&s);
|
||||
if s.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let last = s.as_bytes()[s.len() - 1];
|
||||
last != b' ' && last != b'\t'
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::ast::Subscript;
|
||||
use crate::tests::to_ast;
|
||||
|
||||
let to_subscript = to_ast::<Subscript>(subscript_node);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_*").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..2
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..2 "*"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_{*bo\nld*}").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..10
|
||||
UNDERSCORE@0..1 "_"
|
||||
L_CURLY@1..2 "{"
|
||||
BOLD@2..9
|
||||
STAR@2..3 "*"
|
||||
TEXT@3..8 "bo\nld"
|
||||
STAR@8..9 "*"
|
||||
R_CURLY@9..10 "}"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_+123").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..5
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..2 "+"
|
||||
TEXT@2..5 "123"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_abc").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..4
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..4 "abc"
|
||||
"###
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue