feat: support subscript and superscript
This commit is contained in:
parent
58dfb022c2
commit
8b5c545d4b
12 changed files with 346 additions and 63 deletions
|
|
@ -59,7 +59,7 @@ token_parser!(minus2_token, "--", MINUS2);
|
|||
token_parser!(percent2_token, "%%", PERCENT2);
|
||||
// token_parser!(slash_token, "/", SLASH);
|
||||
token_parser!(backslash_token, "\\", BACKSLASH);
|
||||
// token_parser!(underscore_token, "_", UNDERSCORE);
|
||||
token_parser!(underscore_token, "_", UNDERSCORE);
|
||||
// token_parser!(star_token, "*", STAR);
|
||||
token_parser!(plus_token, "+", PLUS);
|
||||
token_parser!(minus_token, "-", MINUS);
|
||||
|
|
@ -71,6 +71,7 @@ token_parser!(dollar2_token, "$$", DOLLAR2);
|
|||
// token_parser!(equal_token, "=", EQUAL);
|
||||
// token_parser!(tilde_token, "~", TILDE);
|
||||
token_parser!(hash_plus_token, "#+", HASH_PLUS);
|
||||
token_parser!(caret_token, "^", CARET);
|
||||
token_parser!(hash_token, "#", HASH);
|
||||
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,16 @@ fn validate_marker(pos: usize, text: Input) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn verify_pre(input: &str) -> bool {
|
||||
if input.is_empty() {
|
||||
return true;
|
||||
}
|
||||
matches!(
|
||||
input.as_bytes()[input.len() - 1],
|
||||
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::{ast::Bold, tests::to_ast, ParseConfig};
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ pub mod planning;
|
|||
pub mod radio_target;
|
||||
pub mod rule;
|
||||
pub mod snippet;
|
||||
pub mod subscript_superscript;
|
||||
pub mod table;
|
||||
pub mod target;
|
||||
pub mod timestamp;
|
||||
|
|
@ -106,6 +107,7 @@ pub enum SyntaxKind {
|
|||
DOUBLE_ARROW, // '=>'
|
||||
PIPE, // '|'
|
||||
COMMA, // ','
|
||||
CARET, // '^'
|
||||
NEW_LINE, // '\n' or '\r\n' or '\r'
|
||||
WHITESPACE, // ' ' or '\t'
|
||||
BLANK_LINE,
|
||||
|
|
@ -200,6 +202,8 @@ pub enum SyntaxKind {
|
|||
VERBATIM,
|
||||
CODE,
|
||||
ENTITY,
|
||||
SUPERSCRIPT,
|
||||
SUBSCRIPT,
|
||||
|
||||
/* timestamp */
|
||||
TIMESTAMP_ACTIVE,
|
||||
|
|
@ -241,6 +245,8 @@ impl SyntaxKind {
|
|||
| SyntaxKind::MACROS
|
||||
| SyntaxKind::RADIO_TARGET
|
||||
| SyntaxKind::COOKIE
|
||||
| SyntaxKind::SUPERSCRIPT
|
||||
| SyntaxKind::SUBSCRIPT
|
||||
| SyntaxKind::ORG_TABLE_CELL
|
||||
| SyntaxKind::TIMESTAMP_ACTIVE
|
||||
| SyntaxKind::TIMESTAMP_INACTIVE
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ use nom::{AsBytes, IResult, InputLength, InputTake};
|
|||
use super::{
|
||||
combinator::GreenElement,
|
||||
cookie::cookie_node,
|
||||
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
|
||||
emphasis::{
|
||||
self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node,
|
||||
},
|
||||
entity::entity_node,
|
||||
fn_ref::fn_ref_node,
|
||||
inline_call::inline_call_node,
|
||||
|
|
@ -15,6 +17,7 @@ use super::{
|
|||
macros::macros_node,
|
||||
radio_target::radio_target_node,
|
||||
snippet::snippet_node,
|
||||
subscript_superscript::{self, subscript_node, superscript_node},
|
||||
target::target_node,
|
||||
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
|
||||
};
|
||||
|
|
@ -22,7 +25,6 @@ use super::{
|
|||
struct ObjectPositions<'a> {
|
||||
input: Input<'a>,
|
||||
pos: usize,
|
||||
next: Option<usize>,
|
||||
finder: jetscii::BytesConst,
|
||||
}
|
||||
|
||||
|
|
@ -31,10 +33,17 @@ impl ObjectPositions<'_> {
|
|||
ObjectPositions {
|
||||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(
|
||||
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
|
||||
b'\\', b'$', b'@', b'<', b'['
|
||||
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
|
||||
b'@', /* snippet */
|
||||
b'<', /* timestamp, target, radio target */
|
||||
b'[', /* link, cookie, fn_ref, timestamp */
|
||||
b'c', /* inline call */
|
||||
b's', /* inline source */
|
||||
b'\\', b'$', /* latex & entity */
|
||||
b'{', /* macros */
|
||||
b'^', /* superscript */
|
||||
b'_' /* subscript */
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
@ -43,10 +52,11 @@ impl ObjectPositions<'_> {
|
|||
ObjectPositions {
|
||||
input,
|
||||
pos: 0,
|
||||
next: Some(0),
|
||||
finder: jetscii::bytes!(
|
||||
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
|
||||
b'\\', b'$'
|
||||
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
|
||||
b'\\', b'$', /* latex & entity */
|
||||
b'^', /* superscript */
|
||||
b'_' /* subscript */
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
@ -60,25 +70,12 @@ impl<'a> Iterator for ObjectPositions<'a> {
|
|||
return None;
|
||||
}
|
||||
|
||||
if let Some(p) = self.next.take() {
|
||||
return Some(self.input.take_split(p));
|
||||
}
|
||||
|
||||
let bytes = &self.input.as_bytes()[self.pos..];
|
||||
let previous = self.pos;
|
||||
let i = self.finder.find(bytes)?;
|
||||
self.pos += i + 1;
|
||||
|
||||
let p = match bytes[i] {
|
||||
b'{' => {
|
||||
if self.input.s.len() - self.pos > 2 {
|
||||
self.next = Some(self.pos);
|
||||
}
|
||||
self.pos - 1
|
||||
}
|
||||
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
|
||||
_ => self.pos - 1,
|
||||
};
|
||||
let p = self.pos - 1;
|
||||
|
||||
debug_assert!(
|
||||
previous < self.pos && self.pos <= self.input.s.len(),
|
||||
|
|
@ -112,10 +109,10 @@ impl<'a> Iterator for ObjectPositions<'a> {
|
|||
/// - Timestamps
|
||||
/// - Text Markup (bold code strike verbatim underline italic)
|
||||
/// - Line Breaks
|
||||
/// - Subscript and Superscript
|
||||
///
|
||||
/// // todo:
|
||||
/// - Citations
|
||||
/// - Subscript and Superscript
|
||||
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
||||
// TODO:
|
||||
// debug_assert!(!input.is_empty());
|
||||
|
|
@ -125,11 +122,11 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ObjectPositions::standard(i) {
|
||||
if let Ok((input, node)) = standard_object_node(input) {
|
||||
if let Ok((input, pre)) = standard_object_node(input, head) {
|
||||
if !head.is_empty() {
|
||||
nodes.push(head.text_token())
|
||||
}
|
||||
nodes.push(node);
|
||||
nodes.push(pre);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
|
|
@ -157,8 +154,6 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
/// - LaTeX fragments ('\\')
|
||||
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
|
||||
/// - Entities ('\\')
|
||||
///
|
||||
/// // todo:
|
||||
/// - Superscripts and Subscripts
|
||||
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
||||
let mut i = input;
|
||||
|
|
@ -166,11 +161,11 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
|
||||
'l: while !i.is_empty() {
|
||||
for (input, head) in ObjectPositions::minimal(i) {
|
||||
if let Ok((input, node)) = minimal_object_node(input) {
|
||||
if let Ok((input, pre)) = minimal_object_node(input, head) {
|
||||
if !head.is_empty() {
|
||||
nodes.push(head.text_token())
|
||||
}
|
||||
nodes.push(node);
|
||||
nodes.push(pre);
|
||||
debug_assert!(
|
||||
input.input_len() < i.input_len(),
|
||||
"{} < {}",
|
||||
|
|
@ -195,7 +190,7 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
|
|||
}
|
||||
|
||||
/// parse an object from standard sets
|
||||
fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
||||
fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
debug_assert!(
|
||||
i.s.len() >= 2,
|
||||
"object must have at least two characters: {:?}",
|
||||
|
|
@ -203,12 +198,12 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
);
|
||||
|
||||
match &i.as_bytes()[0] {
|
||||
b'*' => bold_node(i),
|
||||
b'+' => strike_node(i),
|
||||
b'/' => italic_node(i),
|
||||
b'_' => underline_node(i),
|
||||
b'=' => verbatim_node(i),
|
||||
b'~' => code_node(i),
|
||||
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
|
||||
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
|
||||
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
|
||||
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
|
||||
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
|
||||
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
|
||||
b'@' => snippet_node(i),
|
||||
b'{' => macros_node(i),
|
||||
b'<' => radio_target_node(i)
|
||||
|
|
@ -219,31 +214,38 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
|||
.or_else(|_| link_node(i))
|
||||
.or_else(|_| fn_ref_node(i))
|
||||
.or_else(|_| timestamp_inactive_node(i)),
|
||||
b'c' => inline_call_node(i),
|
||||
b's' => inline_src_node(i),
|
||||
// NOTE: although not specified in document, inline call and inline src follows the
|
||||
// same pre tokens rule as text markup
|
||||
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
|
||||
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
|
||||
b'$' => latex_fragment_node(i),
|
||||
b'\\' => {
|
||||
if i.as_bytes()[1] == b'\\' {
|
||||
line_break_node(i)
|
||||
} else {
|
||||
entity_node(i).or_else(|_| latex_fragment_node(i))
|
||||
}
|
||||
}
|
||||
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
|
||||
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
|
||||
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
|
||||
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
|
||||
_ => Err(nom::Err::Error(())),
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an object from minimal sets
|
||||
fn minimal_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
|
||||
fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
debug_assert!(
|
||||
i.s.len() >= 2,
|
||||
"object must have at least two characters: {:?}",
|
||||
i.s
|
||||
);
|
||||
|
||||
match &i.as_bytes()[0] {
|
||||
b'*' => bold_node(i),
|
||||
b'+' => strike_node(i),
|
||||
b'/' => italic_node(i),
|
||||
b'_' => underline_node(i),
|
||||
b'=' => verbatim_node(i),
|
||||
b'~' => code_node(i),
|
||||
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
|
||||
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
|
||||
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
|
||||
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
|
||||
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
|
||||
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
|
||||
b'$' => latex_fragment_node(i),
|
||||
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
|
||||
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
|
||||
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
|
||||
_ => Err(nom::Err::Error(())),
|
||||
}
|
||||
}
|
||||
|
|
@ -261,19 +263,18 @@ fn positions() {
|
|||
|
||||
// https://github.com/PoiScript/orgize/issues/69
|
||||
let vec = ObjectPositions::standard(("{3}", &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 2);
|
||||
assert_eq!(vec.len(), 1);
|
||||
assert_eq!(vec[0].0.s, "{3}");
|
||||
// FIXME:
|
||||
assert_eq!(vec[1].0.s, "{3}");
|
||||
|
||||
let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
|
||||
assert_eq!(vec.len(), 6);
|
||||
assert_eq!(vec.len(), 7);
|
||||
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
|
||||
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
|
||||
assert_eq!(vec[2].0.s, "()}//s\nc<<");
|
||||
assert_eq!(vec[3].0.s, ")}//s\nc<<");
|
||||
assert_eq!(vec[4].0.s, "c<<");
|
||||
assert_eq!(vec[5].0.s, "<<");
|
||||
assert_eq!(vec[2].0.s, "//s\nc<<");
|
||||
assert_eq!(vec[3].0.s, "/s\nc<<");
|
||||
assert_eq!(vec[4].0.s, "s\nc<<");
|
||||
assert_eq!(vec[5].0.s, "c<<");
|
||||
assert_eq!(vec[6].0.s, "<<");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -347,4 +348,15 @@ functions starting with ~org-element-~."#),
|
|||
TEXT@174..175 "."
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
t("a^abc"),
|
||||
@r###"
|
||||
PARAGRAPH@0..5
|
||||
TEXT@0..1 "a"
|
||||
SUPERSCRIPT@1..5
|
||||
CARET@1..2 "^"
|
||||
TEXT@2..5 "abc"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
|||
162
src/syntax/subscript_superscript.rs
Normal file
162
src/syntax/subscript_superscript.rs
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
use memchr::memchr2_iter;
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while1},
|
||||
combinator::opt,
|
||||
AsBytes, IResult, InputTake,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
syntax::{
|
||||
combinator::{caret_token, underscore_token},
|
||||
object::object_nodes,
|
||||
},
|
||||
SyntaxKind,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{l_curly_token, node, r_curly_token, GreenElement},
|
||||
input::Input,
|
||||
};
|
||||
|
||||
pub fn superscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, caret) = caret_token(input)?;
|
||||
|
||||
let mut children = vec![caret];
|
||||
|
||||
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
|
||||
children.push(star.text_token());
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else if let Ok((input, (l, contents, r))) = template1(input) {
|
||||
children.push(l);
|
||||
children.extend(object_nodes(contents));
|
||||
children.push(r);
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else if let Ok((input, (sign, contents))) = template2(input) {
|
||||
if let Some(s) = sign {
|
||||
children.push(s)
|
||||
}
|
||||
children.push(contents);
|
||||
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn subscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, underscore) = underscore_token(input)?;
|
||||
|
||||
let mut children = vec![underscore];
|
||||
|
||||
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
|
||||
children.push(star.text_token());
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else if let Ok((input, (l, contents, r))) = template1(input) {
|
||||
children.push(l);
|
||||
children.extend(object_nodes(contents));
|
||||
children.push(r);
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else if let Ok((input, (sign, contents))) = template2(input) {
|
||||
if let Some(s) = sign {
|
||||
children.push(s)
|
||||
}
|
||||
children.push(contents);
|
||||
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
|
||||
} else {
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
}
|
||||
|
||||
fn template1(input: Input) -> IResult<Input, (GreenElement, Input, GreenElement), ()> {
|
||||
let (input, l) = l_curly_token(input)?;
|
||||
let (input, contents) = balanced_brackets(input)?;
|
||||
let (input, r) = r_curly_token(input)?;
|
||||
Ok((input, (l, contents, r)))
|
||||
}
|
||||
|
||||
fn template2(input: Input) -> IResult<Input, (Option<GreenElement>, GreenElement), ()> {
|
||||
let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?;
|
||||
|
||||
let (input, contents) =
|
||||
take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?;
|
||||
|
||||
if contents.s.ends_with(|c: char| !c.is_alphanumeric()) {
|
||||
return Err(nom::Err::Error(()));
|
||||
}
|
||||
|
||||
Ok((input, (sign.map(|x| x.text_token()), contents.text_token())))
|
||||
}
|
||||
|
||||
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
|
||||
let mut pairs = 1;
|
||||
let bytes = input.as_bytes();
|
||||
for i in memchr2_iter(b'{', b'}', bytes) {
|
||||
if bytes[i] == b'{' {
|
||||
pairs += 1;
|
||||
} else if pairs != 1 {
|
||||
pairs -= 1;
|
||||
} else {
|
||||
return Ok(input.take_split(i));
|
||||
}
|
||||
}
|
||||
Err(nom::Err::Error(()))
|
||||
}
|
||||
|
||||
pub fn verify_pre(s: &str) -> bool {
|
||||
dbg!(&s);
|
||||
if s.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let last = s.as_bytes()[s.len() - 1];
|
||||
last != b' ' && last != b'\t'
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::ast::Subscript;
|
||||
use crate::tests::to_ast;
|
||||
|
||||
let to_subscript = to_ast::<Subscript>(subscript_node);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_*").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..2
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..2 "*"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_{*bo\nld*}").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..10
|
||||
UNDERSCORE@0..1 "_"
|
||||
L_CURLY@1..2 "{"
|
||||
BOLD@2..9
|
||||
STAR@2..3 "*"
|
||||
TEXT@3..8 "bo\nld"
|
||||
STAR@8..9 "*"
|
||||
R_CURLY@9..10 "}"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_+123").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..5
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..2 "+"
|
||||
TEXT@2..5 "123"
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_debug_snapshot!(
|
||||
to_subscript("_abc").syntax,
|
||||
@r###"
|
||||
SUBSCRIPT@0..4
|
||||
UNDERSCORE@0..1 "_"
|
||||
TEXT@1..4 "abc"
|
||||
"###
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue