feat: support subscript and superscript

This commit is contained in:
PoiScript 2023-11-21 18:41:17 +08:00
parent 58dfb022c2
commit 8b5c545d4b
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
12 changed files with 346 additions and 63 deletions

View file

@ -50,7 +50,7 @@ impl Traverser for MyHtmlHandler {
special_block quote_block center_block verse_block comment_block example_block export_block
source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros
snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment
latex_environment entity line_break
latex_environment entity line_break superscript subscript
}
}

View file

@ -264,6 +264,14 @@ const nodes = [
struct: "LineBreak",
kind: ["LINE_BREAK"],
},
{
struct: "Superscript",
kind: ["SUPERSCRIPT"],
},
{
struct: "Subscript",
kind: ["SUBSCRIPT"],
},
];
let content = `//! generated file, do not modify it directly

View file

@ -1716,3 +1716,53 @@ impl LineBreak {
self.syntax.text_range().end().into()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Superscript {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for Superscript {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool {
kind == SUPERSCRIPT
}
fn cast(node: SyntaxNode) -> Option<Superscript> {
Self::can_cast(node.kind()).then(|| Superscript { syntax: node })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl Superscript {
pub fn begin(&self) -> u32 {
self.syntax.text_range().start().into()
}
pub fn end(&self) -> u32 {
self.syntax.text_range().end().into()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Subscript {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for Subscript {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool {
kind == SUBSCRIPT
}
fn cast(node: SyntaxNode) -> Option<Subscript> {
Self::can_cast(node.kind()).then(|| Subscript { syntax: node })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl Subscript {
pub fn begin(&self) -> u32 {
self.syntax.text_range().start().into()
}
pub fn end(&self) -> u32 {
self.syntax.text_range().end().into()
}
}

View file

@ -49,7 +49,7 @@
/// special_block quote_block center_block verse_block comment_block example_block export_block
/// source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros
/// snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment
/// latex_environment entity line_break
/// latex_environment entity line_break superscript subscript
/// }
/// }
///
@ -203,6 +203,12 @@ macro_rules! forward_handler {
(@method $handler:ty, line_break) => {
forward_handler!(@method $handler, line_break, WalkEvent<&$crate::ast::LineBreak>);
};
(@method $handler:ty, superscript) => {
forward_handler!(@method $handler, superscript, WalkEvent<&$crate::ast::Superscript>);
};
(@method $handler:ty, subscript) => {
forward_handler!(@method $handler, subscript, WalkEvent<&$crate::ast::Subscript>);
};
(@method $handler:ty, $x:ident) => {
std::compile_error!(std::concat!(std::stringify!($x), " is not a method"));
};

View file

@ -518,4 +518,20 @@ impl Traverser for HtmlExport {
ctx.skip();
}
}
#[tracing::instrument(skip(self, _ctx))]
fn subscript(&mut self, event: WalkEvent<&Subscript>, _ctx: &mut TraversalContext) {
match event {
WalkEvent::Enter(_) => self.output += "<sub>",
WalkEvent::Leave(_) => self.output += "</sub>",
}
}
#[tracing::instrument(skip(self, _ctx))]
fn superscript(&mut self, event: WalkEvent<&Superscript>, _ctx: &mut TraversalContext) {
match event {
WalkEvent::Enter(_) => self.output += "<sup>",
WalkEvent::Leave(_) => self.output += "</sup>",
}
}
}

View file

@ -141,6 +141,8 @@ pub trait Traverser {
LATEX_ENVIRONMENT => traverse!(LatexEnvironment, latex_environment),
ENTITY => traverse!(Entity, entity),
LINE_BREAK => traverse!(LineBreak, line_break),
SUPERSCRIPT => traverse!(Superscript, superscript),
SUBSCRIPT => traverse!(Subscript, subscript),
BLOCK_CONTENT | LIST_ITEM_CONTENT => traverse_children!(node),
@ -252,4 +254,8 @@ pub trait Traverser {
fn entity(&mut self, event: WalkEvent<&Entity>, ctx: &mut TraversalContext);
/// Called when entering or leaving `LineBreak` node
fn line_break(&mut self, event: WalkEvent<&LineBreak>, ctx: &mut TraversalContext);
/// Called when entering or leaving `Superscript` node
fn superscript(&mut self, event: WalkEvent<&Superscript>, ctx: &mut TraversalContext);
/// Called when entering or leaving `Subscript` node
fn subscript(&mut self, event: WalkEvent<&Subscript>, ctx: &mut TraversalContext);
}

View file

@ -59,7 +59,7 @@ token_parser!(minus2_token, "--", MINUS2);
token_parser!(percent2_token, "%%", PERCENT2);
// token_parser!(slash_token, "/", SLASH);
token_parser!(backslash_token, "\\", BACKSLASH);
// token_parser!(underscore_token, "_", UNDERSCORE);
token_parser!(underscore_token, "_", UNDERSCORE);
// token_parser!(star_token, "*", STAR);
token_parser!(plus_token, "+", PLUS);
token_parser!(minus_token, "-", MINUS);
@ -71,6 +71,7 @@ token_parser!(dollar2_token, "$$", DOLLAR2);
// token_parser!(equal_token, "=", EQUAL);
// token_parser!(tilde_token, "~", TILDE);
token_parser!(hash_plus_token, "#+", HASH_PLUS);
token_parser!(caret_token, "^", CARET);
token_parser!(hash_token, "#", HASH);
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);

View file

@ -112,6 +112,16 @@ fn validate_marker(pos: usize, text: Input) -> bool {
}
}
pub fn verify_pre(input: &str) -> bool {
if input.is_empty() {
return true;
}
matches!(
input.as_bytes()[input.len() - 1],
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
)
}
#[test]
fn parse() {
use crate::{ast::Bold, tests::to_ast, ParseConfig};

View file

@ -31,6 +31,7 @@ pub mod planning;
pub mod radio_target;
pub mod rule;
pub mod snippet;
pub mod subscript_superscript;
pub mod table;
pub mod target;
pub mod timestamp;
@ -106,6 +107,7 @@ pub enum SyntaxKind {
DOUBLE_ARROW, // '=>'
PIPE, // '|'
COMMA, // ','
CARET, // '^'
NEW_LINE, // '\n' or '\r\n' or '\r'
WHITESPACE, // ' ' or '\t'
BLANK_LINE,
@ -200,6 +202,8 @@ pub enum SyntaxKind {
VERBATIM,
CODE,
ENTITY,
SUPERSCRIPT,
SUBSCRIPT,
/* timestamp */
TIMESTAMP_ACTIVE,
@ -241,6 +245,8 @@ impl SyntaxKind {
| SyntaxKind::MACROS
| SyntaxKind::RADIO_TARGET
| SyntaxKind::COOKIE
| SyntaxKind::SUPERSCRIPT
| SyntaxKind::SUBSCRIPT
| SyntaxKind::ORG_TABLE_CELL
| SyntaxKind::TIMESTAMP_ACTIVE
| SyntaxKind::TIMESTAMP_INACTIVE

View file

@ -3,7 +3,9 @@ use nom::{AsBytes, IResult, InputLength, InputTake};
use super::{
combinator::GreenElement,
cookie::cookie_node,
emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node},
emphasis::{
self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node,
},
entity::entity_node,
fn_ref::fn_ref_node,
inline_call::inline_call_node,
@ -15,6 +17,7 @@ use super::{
macros::macros_node,
radio_target::radio_target_node,
snippet::snippet_node,
subscript_superscript::{self, subscript_node, superscript_node},
target::target_node,
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
};
@ -22,7 +25,6 @@ use super::{
struct ObjectPositions<'a> {
input: Input<'a>,
pos: usize,
next: Option<usize>,
finder: jetscii::BytesConst,
}
@ -31,10 +33,17 @@ impl ObjectPositions<'_> {
ObjectPositions {
input,
pos: 0,
next: Some(0),
finder: jetscii::bytes!(
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
b'\\', b'$', b'@', b'<', b'['
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'@', /* snippet */
b'<', /* timestamp, target, radio target */
b'[', /* link, cookie, fn_ref, timestamp */
b'c', /* inline call */
b's', /* inline source */
b'\\', b'$', /* latex & entity */
b'{', /* macros */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
@ -43,10 +52,11 @@ impl ObjectPositions<'_> {
ObjectPositions {
input,
pos: 0,
next: Some(0),
finder: jetscii::bytes!(
b' ', b'(', b'{', b'\'', b'"', b'\n', /* */
b'\\', b'$'
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
@ -60,25 +70,12 @@ impl<'a> Iterator for ObjectPositions<'a> {
return None;
}
if let Some(p) = self.next.take() {
return Some(self.input.take_split(p));
}
let bytes = &self.input.as_bytes()[self.pos..];
let previous = self.pos;
let i = self.finder.find(bytes)?;
self.pos += i + 1;
let p = match bytes[i] {
b'{' => {
if self.input.s.len() - self.pos > 2 {
self.next = Some(self.pos);
}
self.pos - 1
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
_ => self.pos - 1,
};
let p = self.pos - 1;
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
@ -112,10 +109,10 @@ impl<'a> Iterator for ObjectPositions<'a> {
/// - Timestamps
/// - Text Markup (bold code strike verbatim underline italic)
/// - Line Breaks
/// - Subscript and Superscript
///
/// // todo:
/// - Citations
/// - Subscript and Superscript
pub fn object_nodes(input: Input) -> Vec<GreenElement> {
// TODO:
// debug_assert!(!input.is_empty());
@ -125,11 +122,11 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
'l: while !i.is_empty() {
for (input, head) in ObjectPositions::standard(i) {
if let Ok((input, node)) = standard_object_node(input) {
if let Ok((input, pre)) = standard_object_node(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
nodes.push(node);
nodes.push(pre);
debug_assert!(
input.input_len() < i.input_len(),
"{} < {}",
@ -157,8 +154,6 @@ pub fn object_nodes(input: Input) -> Vec<GreenElement> {
/// - LaTeX fragments ('\\')
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
/// - Entities ('\\')
///
/// // todo:
/// - Superscripts and Subscripts
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
let mut i = input;
@ -166,11 +161,11 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
'l: while !i.is_empty() {
for (input, head) in ObjectPositions::minimal(i) {
if let Ok((input, node)) = minimal_object_node(input) {
if let Ok((input, pre)) = minimal_object_node(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
nodes.push(node);
nodes.push(pre);
debug_assert!(
input.input_len() < i.input_len(),
"{} < {}",
@ -195,7 +190,7 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
}
/// parse an object from standard sets
fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
fn standard_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
debug_assert!(
i.s.len() >= 2,
"object must have at least two characters: {:?}",
@ -203,12 +198,12 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
);
match &i.as_bytes()[0] {
b'*' => bold_node(i),
b'+' => strike_node(i),
b'/' => italic_node(i),
b'_' => underline_node(i),
b'=' => verbatim_node(i),
b'~' => code_node(i),
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
@ -219,31 +214,38 @@ fn standard_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
b'c' => inline_call_node(i),
b's' => inline_src_node(i),
// NOTE: although not specified in document, inline call and inline src follows the
// same pre tokens rule as text markup
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' => {
if i.as_bytes()[1] == b'\\' {
line_break_node(i)
} else {
entity_node(i).or_else(|_| latex_fragment_node(i))
}
}
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
}
}
/// parse an object from minimal sets
fn minimal_object_node(i: Input) -> IResult<Input, GreenElement, ()> {
fn minimal_object_node<'a>(i: Input<'a>, pre: Input<'a>) -> IResult<Input<'a>, GreenElement, ()> {
debug_assert!(
i.s.len() >= 2,
"object must have at least two characters: {:?}",
i.s
);
match &i.as_bytes()[0] {
b'*' => bold_node(i),
b'+' => strike_node(i),
b'/' => italic_node(i),
b'_' => underline_node(i),
b'=' => verbatim_node(i),
b'~' => code_node(i),
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
}
}
@ -261,19 +263,18 @@ fn positions() {
// https://github.com/PoiScript/orgize/issues/69
let vec = ObjectPositions::standard(("{3}", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 2);
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "{3}");
// FIXME:
assert_eq!(vec[1].0.s, "{3}");
let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 6);
assert_eq!(vec.len(), 7);
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
assert_eq!(vec[2].0.s, "()}//s\nc<<");
assert_eq!(vec[3].0.s, ")}//s\nc<<");
assert_eq!(vec[4].0.s, "c<<");
assert_eq!(vec[5].0.s, "<<");
assert_eq!(vec[2].0.s, "//s\nc<<");
assert_eq!(vec[3].0.s, "/s\nc<<");
assert_eq!(vec[4].0.s, "s\nc<<");
assert_eq!(vec[5].0.s, "c<<");
assert_eq!(vec[6].0.s, "<<");
}
#[test]
@ -347,4 +348,15 @@ functions starting with ~org-element-~."#),
TEXT@174..175 "."
"###
);
insta::assert_debug_snapshot!(
t("a^abc"),
@r###"
PARAGRAPH@0..5
TEXT@0..1 "a"
SUPERSCRIPT@1..5
CARET@1..2 "^"
TEXT@2..5 "abc"
"###
);
}

View file

@ -0,0 +1,162 @@
use memchr::memchr2_iter;
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
combinator::opt,
AsBytes, IResult, InputTake,
};
use crate::{
syntax::{
combinator::{caret_token, underscore_token},
object::object_nodes,
},
SyntaxKind,
};
use super::{
combinator::{l_curly_token, node, r_curly_token, GreenElement},
input::Input,
};
pub fn superscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, caret) = caret_token(input)?;
let mut children = vec![caret];
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
children.push(star.text_token());
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
if let Some(s) = sign {
children.push(s)
}
children.push(contents);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else {
Err(nom::Err::Error(()))
}
}
pub fn subscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, underscore) = underscore_token(input)?;
let mut children = vec![underscore];
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
children.push(star.text_token());
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
if let Some(s) = sign {
children.push(s)
}
children.push(contents);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else {
Err(nom::Err::Error(()))
}
}
fn template1(input: Input) -> IResult<Input, (GreenElement, Input, GreenElement), ()> {
let (input, l) = l_curly_token(input)?;
let (input, contents) = balanced_brackets(input)?;
let (input, r) = r_curly_token(input)?;
Ok((input, (l, contents, r)))
}
fn template2(input: Input) -> IResult<Input, (Option<GreenElement>, GreenElement), ()> {
let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?;
let (input, contents) =
take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?;
if contents.s.ends_with(|c: char| !c.is_alphanumeric()) {
return Err(nom::Err::Error(()));
}
Ok((input, (sign.map(|x| x.text_token()), contents.text_token())))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'{', b'}', bytes) {
if bytes[i] == b'{' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(nom::Err::Error(()))
}
pub fn verify_pre(s: &str) -> bool {
dbg!(&s);
if s.is_empty() {
return false;
}
let last = s.as_bytes()[s.len() - 1];
last != b' ' && last != b'\t'
}
#[test]
fn parse() {
use crate::ast::Subscript;
use crate::tests::to_ast;
let to_subscript = to_ast::<Subscript>(subscript_node);
insta::assert_debug_snapshot!(
to_subscript("_*").syntax,
@r###"
SUBSCRIPT@0..2
UNDERSCORE@0..1 "_"
TEXT@1..2 "*"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_{*bo\nld*}").syntax,
@r###"
SUBSCRIPT@0..10
UNDERSCORE@0..1 "_"
L_CURLY@1..2 "{"
BOLD@2..9
STAR@2..3 "*"
TEXT@3..8 "bo\nld"
STAR@8..9 "*"
R_CURLY@9..10 "}"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_+123").syntax,
@r###"
SUBSCRIPT@0..5
UNDERSCORE@0..1 "_"
TEXT@1..2 "+"
TEXT@2..5 "123"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_abc").syntax,
@r###"
SUBSCRIPT@0..4
UNDERSCORE@0..1 "_"
TEXT@1..4 "abc"
"###
);
}

View file

@ -313,6 +313,12 @@ $$
Entity
\\alpha\\_ \\rightarrow{}\\_ \\beta
-----
Subscript & superscript & line break
E= mc^2 \\\\
Fe_{_3_}O_4
`);
editor.session.on("change", () => render());