feat: initial support for org-fc cloze syntax

This commit is contained in:
PoiScript 2024-05-09 13:04:49 +08:00
parent 8a29a46095
commit 6c4513d857
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
8 changed files with 299 additions and 1 deletions

View file

@ -21,6 +21,7 @@ default = []
indexmap = ["dep:indexmap"]
chrono = ["dep:chrono"]
tracing = ["dep:tracing"]
syntax-org-fc = []
[dependencies]
bytecount = "0.6"

111
src/ast/cloze.rs Normal file
View file

@ -0,0 +1,111 @@
use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode};
use rowan::{ast::AstNode, TextRange, TextSize};
use super::Token;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Cloze {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for Cloze {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool {
kind == SyntaxKind::CLOZE
}
fn cast(node: SyntaxNode) -> Option<Cloze> {
Self::can_cast(node.kind()).then(|| Cloze { syntax: node })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl Cloze {
/// Beginning position of this element
pub fn start(&self) -> TextSize {
self.syntax.text_range().start()
}
/// Ending position of this element
pub fn end(&self) -> TextSize {
self.syntax.text_range().end()
}
/// Range of this element
pub fn text_range(&self) -> TextRange {
self.syntax.text_range()
}
/// Raw text of this element
pub fn raw(&self) -> String {
self.syntax.to_string()
}
pub fn text(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children_with_tokens()
.skip(1)
.take_while(|n| n.kind() != SyntaxKind::R_CURLY)
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "text");
/// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$");
/// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] ");
/// ```
pub fn text_raw(&self) -> String {
self.text()
.fold(String::new(), |acc, e| acc + &e.to_string())
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.hint().is_none());
/// let cloze = Org::parse("{{text}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "");
/// let cloze = Org::parse("{{text}{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "hint");
/// ```
pub fn hint(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_CURLY)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.id().is_none());
/// let cloze = Org::parse("{{text}@}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "");
/// let cloze = Org::parse("{{text}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "id");
/// ```
pub fn id(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::AT)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
}

View file

@ -3,6 +3,8 @@ mod generated;
mod affiliated_keyword;
mod block;
mod clock;
#[cfg(feature = "syntax-org-fc")]
mod cloze;
mod comment;
mod document;
mod drawer;
@ -20,6 +22,8 @@ mod snippet;
mod table;
mod timestamp;
#[cfg(feature = "syntax-org-fc")]
pub use cloze::*;
pub use generated::*;
pub use headline::*;
pub use rowan::ast::support::*;

View file

@ -67,4 +67,7 @@ pub enum Event {
LatexFragment(LatexFragment),
LatexEnvironment(LatexEnvironment),
Entity(Entity),
#[cfg(feature = "syntax-org-fc")]
Cloze(Cloze),
}

162
src/syntax/cloze.rs Normal file
View file

@ -0,0 +1,162 @@
use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake};
use crate::syntax::{
combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token},
object::standard_object_nodes,
};
use super::{
combinator::{GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn cloze_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(cloze_node_base, input)
}
fn cloze_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l_curly2) = l_curly2_token(input)?;
let mut inside_latex = false;
let mut text_end = 0;
for (index, byte) in input.bytes().enumerate() {
match byte {
b'}' if !inside_latex => {
text_end = index;
break;
}
b'$' => {
inside_latex = !inside_latex;
}
_ => {}
}
}
if text_end == 0 {
return Err(nom::Err::Error(()));
}
let (input, text) = input.take_split(text_end);
let (input, r_curly) = r_curly_token(input)?;
let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?;
let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?;
let (input, r_curly_) = r_curly_token(input)?;
let mut b = NodeBuilder::new();
b.push(l_curly2);
b.children.extend(standard_object_nodes(text));
b.push(r_curly);
if let Some((l_curly, hint, r_curly)) = hint {
b.push(l_curly);
b.token(SyntaxKind::TEXT, hint);
b.push(r_curly);
}
if let Some((at, id)) = id {
b.push(at);
b.token(SyntaxKind::TEXT, id);
}
b.push(r_curly_);
Ok((input, b.finish(SyntaxKind::CLOZE)))
}
#[test]
fn parse() {
use crate::ast::Cloze;
use crate::config::ParseConfig;
use crate::tests::to_ast;
let to_cloze = to_ast::<Cloze>(cloze_node);
insta::assert_debug_snapshot!(
to_cloze("{{text}}").syntax,
@r###"
CLOZE@0..8
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
R_CURLY@7..8 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}@id}").syntax,
@r###"
CLOZE@0..11
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
AT@7..8 "@"
TEXT@8..10 "id"
R_CURLY@10..11 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}}").syntax,
@r###"
CLOZE@0..14
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
R_CURLY@13..14 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}@id}").syntax,
@r###"
CLOZE@0..17
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
AT@13..14 "@"
TEXT@14..16 "id"
R_CURLY@16..17 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax,
@r###"
CLOZE@0..28
L_CURLY2@0..2 "{{"
LATEX_FRAGMENT@2..15
DOLLAR@2..3 "$"
TEXT@3..14 "\\frac{a}{b}"
DOLLAR@14..15 "$"
R_CURLY@15..16 "}"
L_CURLY@16..17 "{"
TEXT@17..26 "fractions"
R_CURLY@26..27 "}"
R_CURLY@27..28 "}"
"###
);
let config = &ParseConfig::default();
assert!(cloze_node(("{{}}", config).into()).is_err());
assert!(cloze_node(("{{text}", config).into()).is_err());
assert!(cloze_node(("{text}}", config).into()).is_err());
assert!(cloze_node(("{{text}{}", config).into()).is_err());
assert!(cloze_node(("{{text}a}", config).into()).is_err());
}

View file

@ -42,6 +42,8 @@ token_parser!(r_parens_token, ")", R_PARENS);
token_parser!(l_angle_token, "<", L_ANGLE);
token_parser!(r_angle_token, ">", R_ANGLE);
token_parser!(l_curly_token, "{", L_CURLY);
#[cfg(feature = "syntax-org-fc")]
token_parser!(l_curly2_token, "{{", L_CURLY2);
token_parser!(r_curly_token, "}", R_CURLY);
token_parser!(l_curly3_token, "{{{", L_CURLY3);
token_parser!(r_curly3_token, "}}}", R_CURLY3);

View file

@ -2,6 +2,8 @@
pub mod block;
pub mod clock;
#[cfg(feature = "syntax-org-fc")]
pub mod cloze;
pub mod combinator;
pub mod comment;
pub mod cookie;
@ -79,6 +81,7 @@ pub enum SyntaxKind {
R_ANGLE, // '>'
L_CURLY, // '{'
R_CURLY, // '}'
L_CURLY2, // '{{'
L_CURLY3, // '{{{'
R_CURLY3, // '}}}'
L_ANGLE2, // '<<'
@ -226,6 +229,9 @@ pub enum SyntaxKind {
TIMESTAMP_DELAY_MARK,
TIMESTAMP_VALUE,
TIMESTAMP_UNIT,
#[cfg(feature = "syntax-org-fc")]
CLOZE,
}
impl From<SyntaxKind> for rowan::SyntaxKind {

View file

@ -152,6 +152,7 @@ pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
/// - Text Markup (bold code strike verbatim underline italic)
/// - Line Breaks
/// - Subscript and Superscript
/// - Cloze (if `syntax-org-fc` is enabled)
///
/// // todo:
/// - Citations
@ -166,7 +167,15 @@ pub fn standard_object_nodes(input: Input) -> Vec<GreenElement> {
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'{' => {
cfg_if::cfg_if! {
if #[cfg(feature = "syntax-org-fc")] {
macros_node(i).or_else(|_| super::cloze::cloze_node(i))
} else {
macros_node(i)
}
}
}
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))