From 6c4513d857402fa53a0ba55dd793e32bb6579881 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Thu, 9 May 2024 13:04:49 +0800 Subject: [PATCH] feat: initial support for org-fc cloze syntax --- Cargo.toml | 1 + src/ast/cloze.rs | 111 +++++++++++++++++++++++++++ src/ast/mod.rs | 4 + src/export/event.rs | 3 + src/syntax/cloze.rs | 162 +++++++++++++++++++++++++++++++++++++++ src/syntax/combinator.rs | 2 + src/syntax/mod.rs | 6 ++ src/syntax/object.rs | 11 ++- 8 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 src/ast/cloze.rs create mode 100644 src/syntax/cloze.rs diff --git a/Cargo.toml b/Cargo.toml index 4493689..92ca702 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ default = [] indexmap = ["dep:indexmap"] chrono = ["dep:chrono"] tracing = ["dep:tracing"] +syntax-org-fc = [] [dependencies] bytecount = "0.6" diff --git a/src/ast/cloze.rs b/src/ast/cloze.rs new file mode 100644 index 0000000..f85484b --- /dev/null +++ b/src/ast/cloze.rs @@ -0,0 +1,111 @@ +use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode}; +use rowan::{ast::AstNode, TextRange, TextSize}; + +use super::Token; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Cloze { + pub(crate) syntax: SyntaxNode, +} + +impl AstNode for Cloze { + type Language = OrgLanguage; + + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::CLOZE + } + + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Cloze { syntax: node }) + } + + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} + +impl Cloze { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + + pub fn text(&self) -> impl Iterator { + self.syntax + .children_with_tokens() + .skip(1) + .take_while(|n| n.kind() != SyntaxKind::R_CURLY) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), "text"); + /// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$"); + /// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] "); + /// ``` + pub fn text_raw(&self) -> String { + self.text() + .fold(String::new(), |acc, e| acc + &e.to_string()) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert!(cloze.hint().is_none()); + /// let cloze = Org::parse("{{text}{}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.hint().unwrap(), ""); + /// let cloze = Org::parse("{{text}{hint}}").first_node::().unwrap(); + /// assert_eq!(cloze.hint().unwrap(), "hint"); + /// ``` + pub fn hint(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|n| n.kind() != SyntaxKind::L_CURLY) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert!(cloze.id().is_none()); + /// let cloze = Org::parse("{{text}@}").first_node::().unwrap(); + /// assert_eq!(cloze.id().unwrap(), ""); + /// let cloze = Org::parse("{{text}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.id().unwrap(), "id"); + /// ``` + pub fn id(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|n| n.kind() != SyntaxKind::AT) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a4c7355..9cd7817 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3,6 +3,8 @@ mod generated; mod affiliated_keyword; mod block; mod clock; +#[cfg(feature = "syntax-org-fc")] +mod cloze; mod comment; mod document; mod drawer; @@ -20,6 +22,8 @@ mod snippet; mod table; mod timestamp; +#[cfg(feature = "syntax-org-fc")] +pub use cloze::*; pub use generated::*; pub use headline::*; pub use rowan::ast::support::*; diff --git a/src/export/event.rs b/src/export/event.rs index 8b0d92a..46f65c6 100644 --- a/src/export/event.rs +++ b/src/export/event.rs @@ -67,4 +67,7 @@ pub enum Event { LatexFragment(LatexFragment), LatexEnvironment(LatexEnvironment), Entity(Entity), + + #[cfg(feature = "syntax-org-fc")] + Cloze(Cloze), } diff --git a/src/syntax/cloze.rs b/src/syntax/cloze.rs new file mode 100644 index 0000000..20645a4 --- /dev/null +++ b/src/syntax/cloze.rs @@ -0,0 +1,162 @@ +use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake}; + +use crate::syntax::{ + combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token}, + object::standard_object_nodes, +}; + +use super::{ + combinator::{GreenElement, NodeBuilder}, + input::Input, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn cloze_node(input: Input) -> IResult { + crate::lossless_parser!(cloze_node_base, input) +} + +fn cloze_node_base(input: Input) -> IResult { + let (input, l_curly2) = l_curly2_token(input)?; + + let mut inside_latex = false; + let mut text_end = 0; + for (index, byte) in input.bytes().enumerate() { + match byte { + b'}' if !inside_latex => { + text_end = index; + break; + } + b'$' => { + inside_latex = !inside_latex; + } + _ => {} + } + } + + if text_end == 0 { + return Err(nom::Err::Error(())); + } + + let (input, text) = input.take_split(text_end); + + let (input, r_curly) = r_curly_token(input)?; + + let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?; + + let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?; + + let (input, r_curly_) = r_curly_token(input)?; + + let mut b = NodeBuilder::new(); + + b.push(l_curly2); + b.children.extend(standard_object_nodes(text)); + b.push(r_curly); + + if let Some((l_curly, hint, r_curly)) = hint { + b.push(l_curly); + b.token(SyntaxKind::TEXT, hint); + b.push(r_curly); + } + + if let Some((at, id)) = id { + b.push(at); + b.token(SyntaxKind::TEXT, id); + } + + b.push(r_curly_); + + Ok((input, b.finish(SyntaxKind::CLOZE))) +} + +#[test] +fn parse() { + use crate::ast::Cloze; + use crate::config::ParseConfig; + use crate::tests::to_ast; + + let to_cloze = to_ast::(cloze_node); + + insta::assert_debug_snapshot!( + to_cloze("{{text}}").syntax, + @r###" + CLOZE@0..8 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + R_CURLY@7..8 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}@id}").syntax, + @r###" + CLOZE@0..11 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + AT@7..8 "@" + TEXT@8..10 "id" + R_CURLY@10..11 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}{hint}}").syntax, + @r###" + CLOZE@0..14 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + L_CURLY@7..8 "{" + TEXT@8..12 "hint" + R_CURLY@12..13 "}" + R_CURLY@13..14 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}{hint}@id}").syntax, + @r###" + CLOZE@0..17 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + L_CURLY@7..8 "{" + TEXT@8..12 "hint" + R_CURLY@12..13 "}" + AT@13..14 "@" + TEXT@14..16 "id" + R_CURLY@16..17 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax, + @r###" + CLOZE@0..28 + L_CURLY2@0..2 "{{" + LATEX_FRAGMENT@2..15 + DOLLAR@2..3 "$" + TEXT@3..14 "\\frac{a}{b}" + DOLLAR@14..15 "$" + R_CURLY@15..16 "}" + L_CURLY@16..17 "{" + TEXT@17..26 "fractions" + R_CURLY@26..27 "}" + R_CURLY@27..28 "}" + "### + ); + + let config = &ParseConfig::default(); + + assert!(cloze_node(("{{}}", config).into()).is_err()); + assert!(cloze_node(("{{text}", config).into()).is_err()); + assert!(cloze_node(("{text}}", config).into()).is_err()); + assert!(cloze_node(("{{text}{}", config).into()).is_err()); + assert!(cloze_node(("{{text}a}", config).into()).is_err()); +} diff --git a/src/syntax/combinator.rs b/src/syntax/combinator.rs index a06478d..bd69797 100644 --- a/src/syntax/combinator.rs +++ b/src/syntax/combinator.rs @@ -42,6 +42,8 @@ token_parser!(r_parens_token, ")", R_PARENS); token_parser!(l_angle_token, "<", L_ANGLE); token_parser!(r_angle_token, ">", R_ANGLE); token_parser!(l_curly_token, "{", L_CURLY); +#[cfg(feature = "syntax-org-fc")] +token_parser!(l_curly2_token, "{{", L_CURLY2); token_parser!(r_curly_token, "}", R_CURLY); token_parser!(l_curly3_token, "{{{", L_CURLY3); token_parser!(r_curly3_token, "}}}", R_CURLY3); diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 64a008c..4b0a620 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,6 +2,8 @@ pub mod block; pub mod clock; +#[cfg(feature = "syntax-org-fc")] +pub mod cloze; pub mod combinator; pub mod comment; pub mod cookie; @@ -79,6 +81,7 @@ pub enum SyntaxKind { R_ANGLE, // '>' L_CURLY, // '{' R_CURLY, // '}' + L_CURLY2, // '{{' L_CURLY3, // '{{{' R_CURLY3, // '}}}' L_ANGLE2, // '<<' @@ -226,6 +229,9 @@ pub enum SyntaxKind { TIMESTAMP_DELAY_MARK, TIMESTAMP_VALUE, TIMESTAMP_UNIT, + + #[cfg(feature = "syntax-org-fc")] + CLOZE, } impl From for rowan::SyntaxKind { diff --git a/src/syntax/object.rs b/src/syntax/object.rs index 26e63fd..a76cc67 100644 --- a/src/syntax/object.rs +++ b/src/syntax/object.rs @@ -152,6 +152,7 @@ pub fn minimal_object_nodes(input: Input) -> Vec { /// - Text Markup (bold code strike verbatim underline italic) /// - Line Breaks /// - Subscript and Superscript +/// - Cloze (if `syntax-org-fc` is enabled) /// /// // todo: /// - Citations @@ -166,7 +167,15 @@ pub fn standard_object_nodes(input: Input) -> Vec { b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), b'~' if emphasis::verify_pre(pre.s) => code_node(i), b'@' => snippet_node(i), - b'{' => macros_node(i), + b'{' => { + cfg_if::cfg_if! { + if #[cfg(feature = "syntax-org-fc")] { + macros_node(i).or_else(|_| super::cloze::cloze_node(i)) + } else { + macros_node(i) + } + } + } b'<' => radio_target_node(i) .or_else(|_| target_node(i)) .or_else(|_| timestamp_diary_node(i))