diff --git a/examples/html-slugify.rs b/examples/html-slugify.rs index 769699e..f1b7595 100644 --- a/examples/html-slugify.rs +++ b/examples/html-slugify.rs @@ -50,7 +50,7 @@ impl Traverser for MyHtmlHandler { special_block quote_block center_block verse_block comment_block example_block export_block source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment - latex_environment + latex_environment entity } } diff --git a/src/ast/entity.rs b/src/ast/entity.rs new file mode 100644 index 0000000..7306bfd --- /dev/null +++ b/src/ast/entity.rs @@ -0,0 +1,148 @@ +use crate::{entities::ENTITIES, SyntaxKind}; + +use super::{filter_token, Entity}; + +impl Entity { + fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> { + let token = self + .syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT))?; + let token = token.text(); + + ENTITIES.iter().find(|i| i.0 == token) + } + + /// Entity name + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\alpha{}").first_node::().unwrap(); + /// assert_eq!(e.name(), "alpha"); + /// let e = Org::parse("\\_ ").first_node::().unwrap(); + /// assert_eq!(e.name(), " "); + /// ``` + pub fn name(&self) -> &str { + self.entity().map(|e| e.0).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Entity LaTeX representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\middot").first_node::().unwrap(); + /// assert_eq!(e.latex(), "\\textperiodcentered{}"); + /// ``` + pub fn latex(&self) -> &str { + self.entity().map(|e| e.1).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Whether entity needs to be in math mode + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\middot").first_node::().unwrap(); + /// assert!(!e.is_latex_math()); + /// let e = Org::parse("\\alefsym").first_node::().unwrap(); + /// assert!(e.is_latex_math()); + /// ``` + pub fn is_latex_math(&self) -> bool { + self.entity().map(|e| e.2).unwrap_or_else(|| { + debug_assert!(false); + false + }) + } + + /// Entity HTML representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.html(), "§"); + /// ``` + pub fn html(&self) -> &str { + self.entity().map(|e| e.3).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Entity ASCII representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.ascii(), "section"); + /// ``` + pub fn ascii(&self) -> &str { + self.entity().map(|e| e.4).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Entity Latin1 encoding representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.latin1(), "§"); + /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); + /// assert_eq!(e.latin1(), ">"); + /// ``` + pub fn latin1(&self) -> &str { + self.entity().map(|e| e.5).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Entity UTF-8 encoding representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.utf8(), "§"); + /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); + /// assert_eq!(e.utf8(), "›"); + /// ``` + pub fn utf8(&self) -> &str { + self.entity().map(|e| e.6).unwrap_or_else(|| { + debug_assert!(false); + "" + }) + } + + /// Entity contains optional brackets + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\beta").first_node::().unwrap(); + /// assert!(!e.is_use_brackets()); + /// let e = Org::parse("\\S{}").first_node::().unwrap(); + /// assert!(e.is_use_brackets()); + /// let e = Org::parse("\\_ ").first_node::().unwrap(); + /// assert!(!e.is_use_brackets()); + /// ``` + pub fn is_use_brackets(&self) -> bool { + self.syntax + .children_with_tokens() + .filter(|n| n.kind() == SyntaxKind::TEXT) + .nth(1) + .is_some() + } +} diff --git a/src/ast/generate.js b/src/ast/generate.js index 5a3c96e..bdd4fbc 100644 --- a/src/ast/generate.js +++ b/src/ast/generate.js @@ -263,6 +263,10 @@ const nodes = [ struct: "LatexFragment", kind: ["LATEX_FRAGMENT"], }, + { + struct: "Entity", + kind: ["ENTITY"], + }, ]; let content = `//! generated file, do not modify it directly diff --git a/src/ast/generated.rs b/src/ast/generated.rs index b435c3e..f2f1246 100644 --- a/src/ast/generated.rs +++ b/src/ast/generated.rs @@ -1700,3 +1700,28 @@ impl LatexFragment { self.syntax.text_range().end().into() } } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Entity { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Entity { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ENTITY + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Entity { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Entity { + pub fn begin(&self) -> u32 { + self.syntax.text_range().start().into() + } + pub fn end(&self) -> u32 { + self.syntax.text_range().end().into() + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1bfdf94..18fc23e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4,6 +4,7 @@ mod generated; mod affiliated_keyword; mod drawer; +mod entity; mod headline; mod inline_call; mod link; diff --git a/src/entities.rs b/src/entities.rs new file mode 100644 index 0000000..5f1faf9 --- /dev/null +++ b/src/entities.rs @@ -0,0 +1,468 @@ +// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85 +// nil -> false +// t -> true +// \x00A0 -> \\x00A0 +#[rustfmt::skip] +pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[ +// ("* Letters" +// Latin +("Agrave", "\\`{A}", false, "À", "A", "À", "À"), +("agrave", "\\`{a}", false, "à", "a", "à", "à"), +("Aacute", "\\'{A}", false, "Á", "A", "Á", "Á"), +("aacute", "\\'{a}", false, "á", "a", "á", "á"), +("Acirc", "\\^{A}", false, "Â", "A", "Â", "Â"), +("acirc", "\\^{a}", false, "â", "a", "â", "â"), +("Amacr", "\\={A}", false, "Ā", "A", "Ã", "Ã"), +("amacr", "\\={a}", false, "ā", "a", "ã", "ã"), +("Atilde", "\\~{A}", false, "Ã", "A", "Ã", "Ã"), +("atilde", "\\~{a}", false, "ã", "a", "ã", "ã"), +("Auml", "\\\"{A}", false, "Ä", "Ae", "Ä", "Ä"), +("auml", "\\\"{a}", false, "ä", "ae", "ä", "ä"), +("Aring", "\\AA{}", false, "Å", "A", "Å", "Å"), +("AA", "\\AA{}", false, "Å", "A", "Å", "Å"), +("aring", "\\aa{}", false, "å", "a", "å", "å"), +("AElig", "\\AE{}", false, "Æ", "AE", "Æ", "Æ"), +("aelig", "\\ae{}", false, "æ", "ae", "æ", "æ"), +("Ccedil", "\\c{C}", false, "Ç", "C", "Ç", "Ç"), +("ccedil", "\\c{c}", false, "ç", "c", "ç", "ç"), +("Egrave", "\\`{E}", false, "È", "E", "È", "È"), +("egrave", "\\`{e}", false, "è", "e", "è", "è"), +("Eacute", "\\'{E}", false, "É", "E", "É", "É"), +("eacute", "\\'{e}", false, "é", "e", "é", "é"), +("Ecirc", "\\^{E}", false, "Ê", "E", "Ê", "Ê"), +("ecirc", "\\^{e}", false, "ê", "e", "ê", "ê"), +("Euml", "\\\"{E}", false, "Ë", "E", "Ë", "Ë"), +("euml", "\\\"{e}", false, "ë", "e", "ë", "ë"), +("Igrave", "\\`{I}", false, "Ì", "I", "Ì", "Ì"), +("igrave", "\\`{i}", false, "ì", "i", "ì", "ì"), +("Iacute", "\\'{I}", false, "Í", "I", "Í", "Í"), +("iacute", "\\'{i}", false, "í", "i", "í", "í"), +("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"), +("inodot", "\\i", false, "ı", "i", "ı", "ı"), +("Icirc", "\\^{I}", false, "Î", "I", "Î", "Î"), +("icirc", "\\^{i}", false, "î", "i", "î", "î"), +("Iuml", "\\\"{I}", false, "Ï", "I", "Ï", "Ï"), +("iuml", "\\\"{i}", false, "ï", "i", "ï", "ï"), +("Ntilde", "\\~{N}", false, "Ñ", "N", "Ñ", "Ñ"), +("ntilde", "\\~{n}", false, "ñ", "n", "ñ", "ñ"), +("Ograve", "\\`{O}", false, "Ò", "O", "Ò", "Ò"), +("ograve", "\\`{o}", false, "ò", "o", "ò", "ò"), +("Oacute", "\\'{O}", false, "Ó", "O", "Ó", "Ó"), +("oacute", "\\'{o}", false, "ó", "o", "ó", "ó"), +("Ocirc", "\\^{O}", false, "Ô", "O", "Ô", "Ô"), +("ocirc", "\\^{o}", false, "ô", "o", "ô", "ô"), +("Otilde", "\\~{O}", false, "Õ", "O", "Õ", "Õ"), +("otilde", "\\~{o}", false, "õ", "o", "õ", "õ"), +("Ouml", "\\\"{O}", false, "Ö", "Oe", "Ö", "Ö"), +("ouml", "\\\"{o}", false, "ö", "oe", "ö", "ö"), +("Oslash", "\\O", false, "Ø", "O", "Ø", "Ø"), +("oslash", "\\o{}", false, "ø", "o", "ø", "ø"), +("OElig", "\\OE{}", false, "Œ", "OE", "OE", "Œ"), +("oelig", "\\oe{}", false, "œ", "oe", "oe", "œ"), +("Scaron", "\\v{S}", false, "Š", "S", "S", "Š"), +("scaron", "\\v{s}", false, "š", "s", "s", "š"), +("szlig", "\\ss{}", false, "ß", "ss", "ß", "ß"), +("Ugrave", "\\`{U}", false, "Ù", "U", "Ù", "Ù"), +("ugrave", "\\`{u}", false, "ù", "u", "ù", "ù"), +("Uacute", "\\'{U}", false, "Ú", "U", "Ú", "Ú"), +("uacute", "\\'{u}", false, "ú", "u", "ú", "ú"), +("Ucirc", "\\^{U}", false, "Û", "U", "Û", "Û"), +("ucirc", "\\^{u}", false, "û", "u", "û", "û"), +("Uuml", "\\\"{U}", false, "Ü", "Ue", "Ü", "Ü"), +("uuml", "\\\"{u}", false, "ü", "ue", "ü", "ü"), +("Yacute", "\\'{Y}", false, "Ý", "Y", "Ý", "Ý"), +("yacute", "\\'{y}", false, "ý", "y", "ý", "ý"), +("Yuml", "\\\"{Y}", false, "Ÿ", "Y", "Y", "Ÿ"), +("yuml", "\\\"{y}", false, "ÿ", "y", "ÿ", "ÿ"), + +// Latin (special face) +("fnof", "\\textit{f}", false, "ƒ", "f", "f", "ƒ"), +("real", "\\Re", true, "ℜ", "R", "R", "ℜ"), +("image", "\\Im", true, "ℑ", "I", "I", "ℑ"), +("weierp", "\\wp", true, "℘", "P", "P", "℘"), +("ell", "\\ell", true, "ℓ", "ell", "ell", "ℓ"), +("imath", "\\imath", true, "ı", "[dotless i]", "dotless i", "ı"), +("jmath", "\\jmath", true, "ȷ", "[dotless j]", "dotless j", "ȷ"), + +// Greek +("Alpha", "A", false, "Α", "Alpha", "Alpha", "Α"), +("alpha", "\\alpha", true, "α", "alpha", "alpha", "α"), +("Beta", "B", false, "Β", "Beta", "Beta", "Β"), +("beta", "\\beta", true, "β", "beta", "beta", "β"), +("Gamma", "\\Gamma", true, "Γ", "Gamma", "Gamma", "Γ"), +("gamma", "\\gamma", true, "γ", "gamma", "gamma", "γ"), +("Delta", "\\Delta", true, "Δ", "Delta", "Delta", "Δ"), +("delta", "\\delta", true, "δ", "delta", "delta", "δ"), +("Epsilon", "E", false, "Ε", "Epsilon", "Epsilon", "Ε"), +("epsilon", "\\epsilon", true, "ε", "epsilon", "epsilon", "ε"), +("varepsilon", "\\varepsilon", true, "ε", "varepsilon", "varepsilon", "ε"), +("Zeta", "Z", false, "Ζ", "Zeta", "Zeta", "Ζ"), +("zeta", "\\zeta", true, "ζ", "zeta", "zeta", "ζ"), +("Eta", "H", false, "Η", "Eta", "Eta", "Η"), +("eta", "\\eta", true, "η", "eta", "eta", "η"), +("Theta", "\\Theta", true, "Θ", "Theta", "Theta", "Θ"), +("theta", "\\theta", true, "θ", "theta", "theta", "θ"), +("thetasym", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), +("vartheta", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), +("Iota", "I", false, "Ι", "Iota", "Iota", "Ι"), +("iota", "\\iota", true, "ι", "iota", "iota", "ι"), +("Kappa", "K", false, "Κ", "Kappa", "Kappa", "Κ"), +("kappa", "\\kappa", true, "κ", "kappa", "kappa", "κ"), +("Lambda", "\\Lambda", true, "Λ", "Lambda", "Lambda", "Λ"), +("lambda", "\\lambda", true, "λ", "lambda", "lambda", "λ"), +("Mu", "M", false, "Μ", "Mu", "Mu", "Μ"), +("mu", "\\mu", true, "μ", "mu", "mu", "μ"), +("nu", "\\nu", true, "ν", "nu", "nu", "ν"), +("Nu", "N", false, "Ν", "Nu", "Nu", "Ν"), +("Xi", "\\Xi", true, "Ξ", "Xi", "Xi", "Ξ"), +("xi", "\\xi", true, "ξ", "xi", "xi", "ξ"), +("Omicron", "O", false, "Ο", "Omicron", "Omicron", "Ο"), +("omicron", "\\textit{o}", false, "ο", "omicron", "omicron", "ο"), +("Pi", "\\Pi", true, "Π", "Pi", "Pi", "Π"), +("pi", "\\pi", true, "π", "pi", "pi", "π"), +("Rho", "P", false, "Ρ", "Rho", "Rho", "Ρ"), +("rho", "\\rho", true, "ρ", "rho", "rho", "ρ"), +("Sigma", "\\Sigma", true, "Σ", "Sigma", "Sigma", "Σ"), +("sigma", "\\sigma", true, "σ", "sigma", "sigma", "σ"), +("sigmaf", "\\varsigma", true, "ς", "sigmaf", "sigmaf", "ς"), +("varsigma", "\\varsigma", true, "ς", "varsigma", "varsigma", "ς"), +("Tau", "T", false, "Τ", "Tau", "Tau", "Τ"), +("Upsilon", "\\Upsilon", true, "Υ", "Upsilon", "Upsilon", "Υ"), +("upsih", "\\Upsilon", true, "ϒ", "upsilon", "upsilon", "ϒ"), +("upsilon", "\\upsilon", true, "υ", "upsilon", "upsilon", "υ"), +("Phi", "\\Phi", true, "Φ", "Phi", "Phi", "Φ"), +("phi", "\\phi", true, "φ", "phi", "phi", "ɸ"), +("varphi", "\\varphi", true, "ϕ", "varphi", "varphi", "φ"), +("Chi", "X", false, "Χ", "Chi", "Chi", "Χ"), +("chi", "\\chi", true, "χ", "chi", "chi", "χ"), +("acutex", "\\acute x", true, "´x", "'x", "'x", "𝑥́"), +("Psi", "\\Psi", true, "Ψ", "Psi", "Psi", "Ψ"), +("psi", "\\psi", true, "ψ", "psi", "psi", "ψ"), +("tau", "\\tau", true, "τ", "tau", "tau", "τ"), +("Omega", "\\Omega", true, "Ω", "Omega", "Omega", "Ω"), +("omega", "\\omega", true, "ω", "omega", "omega", "ω"), +("piv", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), +("varpi", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), +("partial", "\\partial", true, "∂", "[partial differential]", "[partial differential]", "∂"), + +// Hebrew +("alefsym", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), +("aleph", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), +("gimel", "\\gimel", true, "ℷ", "gimel", "gimel", "ℷ"), +("beth", "\\beth", true, "ℶ", "beth", "beth", "ב"), +("dalet", "\\daleth", true, "ℸ", "dalet", "dalet", "ד"), + +// Icelandic +("ETH", "\\DH{}", false, "Ð", "D", "Ð", "Ð"), +("eth", "\\dh{}", false, "ð", "dh", "ð", "ð"), +("THORN", "\\TH{}", false, "Þ", "TH", "Þ", "Þ"), +("thorn", "\\th{}", false, "þ", "th", "þ", "þ"), + + //, "* Punctuation", +// Dots and Marks +("dots", "\\dots{}", false, "…", "...", "...", "…"), +("cdots", "\\cdots{}", true, "⋯", "...", "...", "⋯"), +("hellip", "\\dots{}", false, "…", "...", "...", "…"), +("middot", "\\textperiodcentered{}", false, "·", ".", "·", "·"), +("iexcl", "!`", false, "¡", "!", "¡", "¡"), +("iquest", "?`", false, "¿", "?", "¿", "¿"), + +// Dash-like +("shy", "\\-", false, "­", "", "", ""), +("ndash", "--", false, "–", "-", "-", "–"), +("mdash", "---", false, "—", "--", "--", "—"), + +// Quotations +("quot", "\\textquotedbl{}", false, """, "\"", "\"", "\""), +("acute", "\\textasciiacute{}", false, "´", "'", "´", "´"), +("ldquo", "\\textquotedblleft{}", false, "“", "\"", "\"", "“"), +("rdquo", "\\textquotedblright{}", false, "”", "\"", "\"", "”"), +("bdquo", "\\quotedblbase{}", false, "„", "\"", "\"", "„"), +("lsquo", "\\textquoteleft{}", false, "‘", "`", "`", "‘"), +("rsquo", "\\textquoteright{}", false, "’", "'", "'", "’"), +("sbquo", "\\quotesinglbase{}", false, "‚", ", ", ", ", "‚"), +("laquo", "\\guillemotleft{}", false, "«", "<<", "«", "«"), +("raquo", "\\guillemotright{}", false, "»", ">>", "»", "»"), +("lsaquo", "\\guilsinglleft{}", false, "‹", "<", "<", "‹"), +("rsaquo", "\\guilsinglright{}", false, "›", ">", ">", "›"), + +//, "* Other", +// Misc. (often used) +("circ", "\\^{}", false, "ˆ", "^", "^", "∘"), +("vert", "\\vert{}", true, "|", "|", "|", "|"), +("vbar", "|", false, "|", "|", "|", "|"), +("brvbar", "\\textbrokenbar{}", false, "¦", "|", "¦", "¦"), +("S", "\\S", false, "§", "section", "§", "§"), +("sect", "\\S", false, "§", "section", "§", "§"), +("P", "\\P{}", false, "¶", "paragraph", "¶", "¶"), +("para", "\\P{}", false, "¶", "paragraph", "¶", "¶"), +("amp", "\\&", false, "&", "&", "&", "&"), +("lt", "\\textless{}", false, "<", "<", "<", "<"), +("gt", "\\textgreater{}", false, ">", ">", ">", ">"), +("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"), +("slash", "/", false, "/", "/", "/", "/"), +("plus", "+", false, "+", "+", "+", "+"), +("under", "\\_", false, "_", "_", "_", "_"), +("equal", "=", false, "=", "=", "=", "="), +("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"), +("dagger", "\\textdagger{}", false, "†", "[dagger]", "[dagger]", "†"), +("dag", "\\dag{}", false, "†", "[dagger]", "[dagger]", "†"), +("Dagger", "\\textdaggerdbl{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), +("ddag", "\\ddag{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), + +// Whitespace +("nbsp", "~", false, " ", ", ", "\\x00A0", "\\x00A0"), +("ensp", "\\hspace*{.5em}", false, " ", ", ", ", ", " "), +("emsp", "\\hspace*{1em}", false, " ", ", ", ", ", " "), +("thinsp", "\\hspace*{.2em}", false, " ", ", ", ", ", " "), + +// Currency +("curren", "\\textcurrency{}", false, "¤", "curr.", "¤", "¤"), +("cent", "\\textcent{}", false, "¢", "cent", "¢", "¢"), +("pound", "\\pounds{}", false, "£", "pound", "£", "£"), +("yen", "\\textyen{}", false, "¥", "yen", "¥", "¥"), +("euro", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), +("EUR", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), +("dollar", "\\$", false, "$", "$", "$", "$"), +("USD", "\\$", false, "$", "$", "$", "$"), + +// Property Marks +("copy", "\\textcopyright{}", false, "©", "(c)", "©", "©"), +("reg", "\\textregistered{}", false, "®", "(r)", "®", "®"), +("trade", "\\texttrademark{}", false, "™", "TM", "TM", "™"), + +// Science, etrueal. +("minus", "-", true, "−", "-", "-", "−"), +("pm", "\\textpm{}", false, "±", "+-", "±", "±"), +("plusmn", "\\textpm{}", false, "±", "+-", "±", "±"), +("times", "\\texttimes{}", false, "×", "*", "×", "×"), +("frasl", "/", false, "⁄", "/", "/", "⁄"), +("colon", "\\colon", true, ":", ":", ":", ":"), +("div", "\\textdiv{}", false, "÷", "/", "÷", "÷"), +("frac12", "\\textonehalf{}", false, "½", "1/2", "½", "½"), +("frac14", "\\textonequarter{}", false, "¼", "1/4", "¼", "¼"), +("frac34", "\\textthreequarters{}", false, "¾", "3/4", "¾", "¾"), +("permil", "\\textperthousand{}", false, "‰", "per thousand", "per thousand", "‰"), +("sup1", "\\textonesuperior{}", false, "¹", "^1", "¹", "¹"), +("sup2", "\\texttwosuperior{}", false, "²", "^2", "²", "²"), +("sup3", "\\textthreesuperior{}", false, "³", "^3", "³", "³"), +("radic", "\\sqrt{\\,}", true, "√", "[square root]", "[square root]", "√"), +("sum", "\\sum", true, "∑", "[sum]", "[sum]", "∑"), +("prod", "\\prod", true, "∏", "[product]", "[n-ary product]", "∏"), +("micro", "\\textmu{}", false, "µ", "micro", "µ", "µ"), +("macr", "\\textasciimacron{}", false, "¯", "[macron]", "¯", "¯"), +("deg", "\\textdegree{}", false, "°", "degree", "°", "°"), +("prime", "\\prime", true, "′", "'", "'", "′"), +("Prime", "\\prime{}\\prime", true, "″", "''", "''", "″"), +("infin", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), +("infty", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), +("prop", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), +("propto", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), +("not", "\\textlnot{}", false, "¬", "[angled dash]", "¬", "¬"), +("neg", "\\neg{}", true, "¬", "[angled dash]", "¬", "¬"), +("land", "\\land", true, "∧", "[logical and]", "[logical and]", "∧"), +("wedge", "\\wedge", true, "∧", "[logical and]", "[logical and]", "∧"), +("lor", "\\lor", true, "∨", "[logical or]", "[logical or]", "∨"), +("vee", "\\vee", true, "∨", "[logical or]", "[logical or]", "∨"), +("cap", "\\cap", true, "∩", "[intersection]", "[intersection]", "∩"), +("cup", "\\cup", true, "∪", "[union]", "[union]", "∪"), +("smile", "\\smile", true, "⌣", "[cup product]", "[cup product]", "⌣"), +("frown", "\\frown", true, "⌢", "[Cap product]", "[cap product]", "⌢"), +("int", "\\int", true, "∫", "[integral]", "[integral]", "∫"), +("therefore", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), +("there4", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), +("because", "\\because", true, "∵", "[because]", "[because]", "∵"), +("sim", "\\sim", true, "∼", "~", "~", "∼"), +("cong", "\\cong", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), +("simeq", "\\simeq", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), +("asymp", "\\asymp", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), +("approx", "\\approx", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), +("ne", "\\ne", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), +("neq", "\\neq", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), +("equiv", "\\equiv", true, "≡", "[identical to]", "[identical to]", "≡"), + +("triangleq", "\\triangleq", true, "≜", "[defined to]", "[defined to]", "≜"), +("le", "\\le", true, "≤", "<=", "<=", "≤"), +("leq", "\\le", true, "≤", "<=", "<=", "≤"), +("ge", "\\ge", true, "≥", ">=", ">=", "≥"), +("geq", "\\ge", true, "≥", ">=", ">=", "≥"), +("lessgtr", "\\lessgtr", true, "≶", "[less than or greater than]", "[less than or greater than]", "≶"), +("lesseqgtr", "\\lesseqgtr", true, "⋚", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", "⋚"), +("ll", "\\ll", true, "≪", "<<", "<<", "≪"), +("Ll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), +("lll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), +("gg", "\\gg", true, "≫", ">>", ">>", "≫"), +("Gg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), +("ggg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), +("prec", "\\prec", true, "≺", "[precedes]", "[precedes]", "≺"), +("preceq", "\\preceq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), +("preccurlyeq", "\\preccurlyeq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), +("succ", "\\succ", true, "≻", "[succeeds]", "[succeeds]", "≻"), +("succeq", "\\succeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), +("succcurlyeq", "\\succcurlyeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), +("sub", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), +("subset", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), +("sup", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), +("supset", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), +("nsub", "\\not\\subset", true, "⊄", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", "⊄"), +("sube", "\\subseteq", true, "⊆", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", "⊆"), +("nsup", "\\not\\supset", true, "⊅", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", "⊅"), +("supe", "\\supseteq", true, "⊇", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", "⊇"), +("setminus", "\\setminus", true, "∖", "\\", "\\", "⧵"), +("forall", "\\forall", true, "∀", "[for all]", "[for all]", "∀"), +("exist", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), +("exists", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), +("nexist", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), +("nexists", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), +("empty", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), +("emptyset", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), +("isin", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), +("in", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), +("notin", "\\notin", true, "∉", "[, notruean, elementrueof]", "[, notruean, elementrueof]", "∉"), +("ni", "\\ni", true, "∋", "[contains as member]", "[contains as member]", "∋"), +("nabla", "\\nabla", true, "∇", "[nabla]", "[nabla]", "∇"), +("ang", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), +("angle", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), +("perp", "\\perp", true, "⊥", "[up tack]", "[up tack]", "⊥"), +("parallel", "\\parallel", true, "∥", "||", "||", "∥"), +("sdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), +("cdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), +("lceil", "\\lceil", true, "⌈", "[, leftrueceiling]", "[, leftrueceiling]", "⌈"), +("rceil", "\\rceil", true, "⌉", "[, rightrueceiling]", "[, rightrueceiling]", "⌉"), +("lfloor", "\\lfloor", true, "⌊", "[, leftruefloor]", "[, leftruefloor]", "⌊"), +("rfloor", "\\rfloor", true, "⌋", "[, rightruefloor]", "[, rightruefloor]", "⌋"), +("lang", "\\langle", true, "⟨", "<", "<", "⟨"), +("rang", "\\rangle", true, "⟩", ">", ">", "⟩"), +("langle", "\\langle", true, "⟨", "<", "<", "⟨"), +("rangle", "\\rangle", true, "⟩", ">", ">", "⟩"), +("hbar", "\\hbar", true, "ℏ", "hbar", "hbar", "ℏ"), +("mho", "\\mho", true, "℧", "mho", "mho", "℧"), + +// Arrows +("larr", "\\leftarrow", true, "←", "<-", "<-", "←"), +("leftarrow", "\\leftarrow", true, "←", "<-", "<-", "←"), +("gets", "\\gets", true, "←", "<-", "<-", "←"), +("lArr", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), +("Leftarrow", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), +("uarr", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), +("uparrow", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), +("uArr", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), +("Uparrow", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), +("rarr", "\\rightarrow", true, "→", "->", "->", "→"), +("to", "\\to", true, "→", "->", "->", "→"), +("rightarrow", "\\rightarrow", true, "→", "->", "->", "→"), +("rArr", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), +("Rightarrow", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), +("darr", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), +("downarrow", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), +("dArr", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), +("Downarrow", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), +("harr", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), +("leftrightarrow", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), +("hArr", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), +("Leftrightarrow", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), +("crarr", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), +("hookleftarrow", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), + +// Function names +("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"), +("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"), +("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"), +("arg", "\\arg", true, "arg", "arg", "arg", "arg"), +("cos", "\\cos", true, "cos", "cos", "cos", "cos"), +("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"), +("cot", "\\cot", true, "cot", "cot", "cot", "cot"), +("coth", "\\coth", true, "coth", "coth", "coth", "coth"), +("csc", "\\csc", true, "csc", "csc", "csc", "csc"), +("deg", "\\deg", true, "°", "deg", "deg", "deg"), +("det", "\\det", true, "det", "det", "det", "det"), +("dim", "\\dim", true, "dim", "dim", "dim", "dim"), +("exp", "\\exp", true, "exp", "exp", "exp", "exp"), +("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"), +("hom", "\\hom", true, "hom", "hom", "hom", "hom"), +("inf", "\\inf", true, "inf", "inf", "inf", "inf"), +("ker", "\\ker", true, "ker", "ker", "ker", "ker"), +("lg", "\\lg", true, "lg", "lg", "lg", "lg"), +("lim", "\\lim", true, "lim", "lim", "lim", "lim"), +("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"), +("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"), +("ln", "\\ln", true, "ln", "ln", "ln", "ln"), +("log", "\\log", true, "log", "log", "log", "log"), +("max", "\\max", true, "max", "max", "max", "max"), +("min", "\\min", true, "min", "min", "min", "min"), +("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"), +("sec", "\\sec", true, "sec", "sec", "sec", "sec"), +("sin", "\\sin", true, "sin", "sin", "sin", "sin"), +("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"), +("sup", "\\sup", true, "⊃", "sup", "sup", "sup"), +("tan", "\\tan", true, "tan", "tan", "tan", "tan"), +("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"), + +// Signs & Symbols +("bull", "\\textbullet{}", false, "•", "*", "*", "•"), +("bullet", "\\textbullet{}", false, "•", "*", "*", "•"), +("star", "\\star", true, "*", "*", "*", "⋆"), +("lowast", "\\ast", true, "∗", "*", "*", "∗"), +("ast", "\\ast", true, "∗", "*", "*", "*"), +("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"), +("oplus", "\\oplus", true, "⊕", "[circled plus]", "[circled plus]", "⊕"), +("otimes", "\\otimes", true, "⊗", "[circled times]", "[circled times]", "⊗"), +("check", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), +("checkmark", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), + +// Miscellaneous (seldom used) +("ordf", "\\textordfeminine{}", false, "ª", "_a_", "ª", "ª"), +("ordm", "\\textordmasculine{}", false, "º", "_o_", "º", "º"), +("cedil", "\\c{}", false, "¸", "[cedilla]", "¸", "¸"), +("oline", "\\overline{~}", true, "‾", "[overline]", "¯", "‾"), +("uml", "\\textasciidieresis{}", false, "¨", "[diaeresis]", "¨", "¨"), +("zwnj", "\\/{}", false, "‌", "", "", "‌"), +("zwj", "", false, "‍", "", "", "‍"), +("lrm", "", false, "‎", "", "", "\u{200E}"), +("rlm", "", false, "‏", "", "", "\u{200F}"), + +// Smilies +("smiley", "\\ddot\\smile", true, "☺", ":-)", ":-)", "☺"), +("blacksmile", "\\ddot\\smile", true, "☻", ":-)", ":-)", "☻"), +("sad", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), +("frowny", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), + +// Suits +("clubs", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), +("clubsuit", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), +("spades", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), +("spadesuit", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), +("hearts", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), +("heartsuit", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), +("diams", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), +("diamondsuit", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), +("diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), +("Diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), +("loz", "\\lozenge", true, "◊", "[lozenge]", "[lozenge]", "⧫"), + +// spaces +// fish shell: +// for i in (seq 1 20) +// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")' +// end +(" ", "\\hspace*{0.5em}", true, " ", " ", " ", "\\x2002"), +(" ", "\\hspace*{1em}", true, "  ", " ", " ", "\\x2002\\x2002"), +(" ", "\\hspace*{1.5em}", true, "   ", " ", " ", "\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{2em}", true, "    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{2.5em}", true, "     ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{3em}", true, "      ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{3.5em}", true, "       ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{4em}", true, "        ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{4.5em}", true, "         ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{5em}", true, "          ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{5.5em}", true, "           ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{6em}", true, "            ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{6.5em}", true, "             ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{7em}", true, "              ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{7.5em}", true, "               ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{8em}", true, "                ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{8.5em}", true, "                 ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{9em}", true, "                  ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{9.5em}", true, "                   ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{10em}", true, "                    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +]; diff --git a/src/export/forward.rs b/src/export/forward.rs index 41adc51..f27bfd7 100644 --- a/src/export/forward.rs +++ b/src/export/forward.rs @@ -49,7 +49,7 @@ /// special_block quote_block center_block verse_block comment_block example_block export_block /// source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros /// snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment -/// latex_environment +/// latex_environment entity /// } /// } /// @@ -197,6 +197,9 @@ macro_rules! forward_handler { (@method $handler:ty, latex_environment) => { forward_handler!(@method $handler, latex_environment, WalkEvent<&$crate::ast::LatexEnvironment>); }; + (@method $handler:ty, entity) => { + forward_handler!(@method $handler, entity, WalkEvent<&$crate::ast::Entity>); + }; (@method $handler:ty, $x:ident) => { std::compile_error!(std::concat!(std::stringify!($x), " is not a method")); }; diff --git a/src/export/html.rs b/src/export/html.rs index 85f58f4..3a3b264 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -461,4 +461,12 @@ impl Traverser for HtmlExport { ctx.skip(); } } + + #[tracing::instrument(skip(self, ctx))] + fn entity(&mut self, event: WalkEvent<&Entity>, ctx: &mut TraversalContext) { + if let WalkEvent::Enter(e) = event { + self.output += e.html(); + ctx.skip(); + } + } } diff --git a/src/export/traverse.rs b/src/export/traverse.rs index 6884bf3..a44398c 100644 --- a/src/export/traverse.rs +++ b/src/export/traverse.rs @@ -139,6 +139,7 @@ pub trait Traverser { LINK => traverse!(Link, link), LATEX_FRAGMENT => traverse!(LatexFragment, latex_fragment), LATEX_ENVIRONMENT => traverse!(LatexEnvironment, latex_environment), + ENTITY => traverse!(Entity, entity), BLOCK_CONTENT | LIST_ITEM_CONTENT => traverse_children!(node), @@ -246,4 +247,6 @@ pub trait Traverser { _event: WalkEvent<&LatexEnvironment>, _ctx: &mut TraversalContext, ); + /// Called when entering or leaving `Entity` node + fn entity(&mut self, _event: WalkEvent<&Entity>, _ctx: &mut TraversalContext); } diff --git a/src/lib.rs b/src/lib.rs index 378dea1..7c82702 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod ast; mod config; +mod entities; pub mod export; mod org; mod syntax; diff --git a/src/syntax/entity.rs b/src/syntax/entity.rs new file mode 100644 index 0000000..056126e --- /dev/null +++ b/src/syntax/entity.rs @@ -0,0 +1,120 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_while_m_n}, + character::complete::alphanumeric1, + combinator::opt, + IResult, +}; + +use crate::{ + entities::ENTITIES, + syntax::combinator::{backslash_token, node}, + SyntaxKind, +}; + +use super::{combinator::GreenElement, input::Input}; + +pub fn entity_node(input: Input) -> IResult { + debug_assert!(input.s.starts_with('\\')); + let mut parser = alt((template1, template2)); + crate::lossless_parser!(parser, input) +} + +// \NAME POST or // \NAME{} +fn template1(input: Input) -> IResult { + let (input, backslash) = backslash_token(input)?; + let (input, name) = alphanumeric1(input)?; + + if ENTITIES.iter().all(|i| i.0 != name.s) { + return Err(nom::Err::Error(())); + } + let (input, brackets) = opt(tag("{}"))(input)?; + + if let Some(brackets) = brackets { + return Ok(( + input, + node( + SyntaxKind::ENTITY, + [backslash, name.text_token(), brackets.text_token()], + ), + )); + } + + if let Some(post) = input.bytes().next() { + if post.is_ascii_alphabetic() { + return Err(nom::Err::Error(())); + } + } + + Ok(( + input, + node(SyntaxKind::ENTITY, [backslash, name.text_token()]), + )) +} + +// \_SPACES +fn template2(input: Input) -> IResult { + let (input, backslash) = backslash_token(input)?; + let (input, underscore) = tag("_")(input)?; + let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?; + Ok(( + input, + node( + SyntaxKind::ENTITY, + [ + backslash, + underscore.token(SyntaxKind::UNDERSCORE), + spaces.text_token(), + ], + ), + )) +} + +#[test] +fn parse() { + use crate::{ast::Entity, tests::to_ast, ParseConfig}; + + let to_entity = to_ast::(entity_node); + + insta::assert_debug_snapshot!( + to_entity("\\cent").syntax, + @r###" + ENTITY@0..5 + BACKSLASH@0..1 "\\" + TEXT@1..5 "cent" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\S").syntax, + @r###" + ENTITY@0..2 + BACKSLASH@0..1 "\\" + TEXT@1..2 "S" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\frac12{}test").syntax, + @r###" + ENTITY@0..9 + BACKSLASH@0..1 "\\" + TEXT@1..7 "frac12" + TEXT@7..9 "{}" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\_ ").syntax, + @r###" + ENTITY@0..21 + BACKSLASH@0..1 "\\" + UNDERSCORE@1..2 "_" + TEXT@2..21 " " + "### + ); + + let c = ParseConfig::default(); + + assert!(entity_node(("\\poi", &c).into()).is_err()); +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index df57fe5..f05da2f 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -10,6 +10,7 @@ pub mod drawer; pub mod dyn_block; pub mod element; pub mod emphasis; +pub mod entity; pub mod fixed_width; pub mod fn_def; pub mod fn_ref; @@ -196,6 +197,7 @@ pub enum SyntaxKind { UNDERLINE, VERBATIM, CODE, + ENTITY, /* timestamp */ TIMESTAMP_ACTIVE, diff --git a/src/syntax/object.rs b/src/syntax/object.rs index c21d1b3..9903103 100644 --- a/src/syntax/object.rs +++ b/src/syntax/object.rs @@ -4,6 +4,7 @@ use super::{ combinator::GreenElement, cookie::cookie_node, emphasis::{bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node}, + entity::entity_node, fn_ref::fn_ref_node, inline_call::inline_call_node, inline_src::inline_src_node, @@ -41,7 +42,7 @@ impl<'a> Iterator for ObjectPositions<'a> { type Item = (Input<'a>, Input<'a>); fn next(&mut self) -> Option { - if self.input.input_len() < 3 || self.pos >= self.input.input_len() { + if self.input.input_len() < 2 || self.pos >= self.input.input_len() { return None; } @@ -56,7 +57,7 @@ impl<'a> Iterator for ObjectPositions<'a> { let p = match bytes[i] { b'{' => { - if self.input.s.len() - self.pos > 3 { + if self.input.s.len() - self.pos > 2 { self.next = Some(self.pos); } self.pos - 1 @@ -73,8 +74,8 @@ impl<'a> Iterator for ObjectPositions<'a> { self.input.s.len() ); - // a valid object requires at least three characters - if self.input.s.len() - p < 3 { + // a valid object requires at least two characters + if self.input.s.len() - p < 2 { return None; } @@ -92,8 +93,8 @@ pub fn object_nodes(input: Input) -> Vec { 'l: while !i.is_empty() { for (input, head) in ObjectPositions::new(i) { debug_assert!( - input.s.len() >= 3, - "object must have at least three characters: {:?}", + input.s.len() >= 2, + "object must have at least two characters: {:?}", input.s ); if let Ok((input, node)) = object_node(input) { @@ -146,7 +147,7 @@ fn object_node(i: Input) -> IResult { b'c' => inline_call_node(i), b's' => inline_src_node(i), b'$' => latex_fragment_node(i), - b'\\' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), _ => Err(nom::Err::Error(())), } } @@ -155,9 +156,13 @@ fn object_node(i: Input) -> IResult { fn positions() { let config = crate::ParseConfig::default(); - let vec = ObjectPositions::new(("*{", &config).into()).collect::>(); + let vec = ObjectPositions::new(("*", &config).into()).collect::>(); assert!(vec.is_empty()); + let vec = ObjectPositions::new(("*{", &config).into()).collect::>(); + assert_eq!(vec.len(), 1); + assert_eq!(vec[0].0.s, "*{"); + // https://github.com/PoiScript/orgize/issues/69 let vec = ObjectPositions::new(("{3}", &config).into()).collect::>(); assert_eq!(vec.len(), 2); @@ -166,12 +171,13 @@ fn positions() { assert_eq!(vec[1].0.s, "{3}"); let vec = ObjectPositions::new(("*{()}//s\nc<<", &config).into()).collect::>(); - assert_eq!(vec.len(), 5); + assert_eq!(vec.len(), 6); assert_eq!(vec[0].0.s, "*{()}//s\nc<<"); assert_eq!(vec[1].0.s, "{()}//s\nc<<"); assert_eq!(vec[2].0.s, "()}//s\nc<<"); assert_eq!(vec[3].0.s, ")}//s\nc<<"); assert_eq!(vec[4].0.s, "c<<"); + assert_eq!(vec[5].0.s, "<<"); } #[test] diff --git a/wasm/index.html b/wasm/index.html index 7160da1..02a515c 100644 --- a/wasm/index.html +++ b/wasm/index.html @@ -308,6 +308,11 @@ $$ a&=b+c \\\\ d+e&=f \\end{align} + +----- +Entity + +\\alpha\\_ \\rightarrow{}\\_ \\beta `); editor.session.on("change", () => render());