chore: add orgize-{cli,common,lsp} package

This commit is contained in:
PoiScript 2023-12-20 21:56:10 +08:00
parent 6930640866
commit 4cc1130a17
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
131 changed files with 6577 additions and 56 deletions

View file

@ -1,55 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, AffiliatedKeyword, Token};
impl AffiliatedKeyword {
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.key(), "CAPTION");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "keyword must contains TEXT");
Token::default()
})
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert!(keyword.optional().is_none());
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.optional().unwrap(), "OPTIONAL");
/// ```
pub fn optional(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|it| it.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), " VALUE");
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]:VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), "VALUE");
/// ```
pub fn value(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
}
}

View file

@ -1,109 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, ExportBlock, SourceBlock, Token};
impl SourceBlock {
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "c");
/// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "javascript");
///
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.language().is_none());
/// ````
pub fn language(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20");
/// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20 -r");
///
/// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// ````
pub fn switches(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle yes");
/// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle");
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.parameters().is_none());
/// ````
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS))
}
/// Return unescaped source code string
///
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "");
/// let block = Org::parse("#+begin_src\n,* foo \n,#+ bar\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "* foo \n#+ bar\n");
/// ````
pub fn value(&self) -> String {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, value| acc + &value)
}
}
impl ExportBlock {
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse("#+begin_export html\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.ty().unwrap(), "html");
///
/// let block = Org::parse("#+begin_export\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert!(block.ty().is_none());
/// ````
pub fn ty(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE))
}
}

View file

@ -1,58 +0,0 @@
use rowan::ast::support;
use crate::{ast::Token, SyntaxKind};
use super::{Clock, Timestamp};
impl Clock {
pub fn value(&self) -> Option<Timestamp> {
support::child(&self.syntax)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.duration().is_none());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert_eq!(clock.duration().unwrap(), "12:00");
///
/// ```
pub fn duration(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|t| t.kind() != SyntaxKind::DOUBLE_ARROW)
.skip(1)
.find(|t| t.kind() != SyntaxKind::WHITESPACE)
.map(|e| {
debug_assert!(e.kind() == SyntaxKind::TEXT);
Token(e.into_token())
})
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(!clock.is_closed());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(clock.is_closed());
/// ```
pub fn is_closed(&self) -> bool {
self.syntax
.children_with_tokens()
.any(|t| t.kind() == SyntaxKind::DOUBLE_ARROW)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.is_running());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(!clock.is_running());
/// ```
pub fn is_running(&self) -> bool {
!self.is_closed()
}
}

View file

@ -1,20 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Comment};
impl Comment {
/// Contents without pound signs
///
/// ```rust
/// use orgize::{ast::Comment, Org};
///
/// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::<Comment>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

View file

@ -1,64 +0,0 @@
use std::collections::HashMap;
use super::{filter_token, SyntaxKind, Token};
use crate::ast::PropertyDrawer;
impl PropertyDrawer {
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.iter().count(), 2);
/// ```
pub fn iter(&self) -> impl Iterator<Item = (Token, Token)> {
self.node_properties().filter_map(|property| {
let mut texts = property
.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT));
Some((texts.next()?, texts.next()?))
})
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.get("CUSTOM_ID").unwrap(), "someid");
/// assert_eq!(drawer.get("ID").unwrap(), "id");
/// ```
pub fn get(&self, key: &str) -> Option<Token> {
self.iter().find_map(|(k, v)| (k == key).then_some(v))
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:CUSTOM_ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_hash_map();
/// assert_eq!(map.len(), 1);
/// assert_eq!(map.get("CUSTOM_ID").unwrap(), "id");
/// ```
pub fn to_hash_map(&self) -> HashMap<Token, Token> {
self.iter().collect()
}
#[cfg(feature = "indexmap")]
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_index_map();
/// let item1 = map.get_index(1).unwrap();
/// assert_eq!(item1.0, "ID");
/// assert_eq!(item1.1, "id");
/// ```
pub fn to_index_map(&self) -> indexmap::IndexMap<Token, Token> {
self.iter().collect()
}
}

View file

@ -1,168 +0,0 @@
use crate::{entities::ENTITIES, SyntaxKind};
use super::{filter_token, Entity};
impl Entity {
fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> {
let token = self
.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))?;
ENTITIES.iter().find(|i| i.0 == token.as_ref())
}
/// Entity name
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\alpha{}").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), "alpha");
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), " ");
/// ```
pub fn name(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.0,
)
}
/// Entity LaTeX representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert_eq!(e.latex(), "\\textperiodcentered{}");
/// ```
pub fn latex(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.1,
)
}
/// Whether entity needs to be in math mode
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert!(!e.is_latex_math());
/// let e = Org::parse("\\alefsym").first_node::<Entity>().unwrap();
/// assert!(e.is_latex_math());
/// ```
pub fn is_latex_math(&self) -> bool {
self.entity().map_or_else(
|| {
debug_assert!(false);
false
},
|e| e.2,
)
}
/// Entity HTML representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.html(), "&sect;");
/// ```
pub fn html(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.3,
)
}
/// Entity ASCII representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.ascii(), "section");
/// ```
pub fn ascii(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.4,
)
}
/// Entity Latin1 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), ">");
/// ```
pub fn latin1(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.5,
)
}
/// Entity UTF-8 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "");
/// ```
pub fn utf8(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.6,
)
}
/// Entity contains optional brackets
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\beta").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// let e = Org::parse("\\S{}").first_node::<Entity>().unwrap();
/// assert!(e.is_use_brackets());
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// ```
pub fn is_use_brackets(&self) -> bool {
self.syntax
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::TEXT)
.nth(1)
.is_some()
}
}

View file

@ -1,20 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, FixedWidth};
impl FixedWidth {
/// Contents without colons prefix
///
/// ```rust
/// use orgize::{ast::FixedWidth, Org};
///
/// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::<FixedWidth>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

View file

@ -1,352 +0,0 @@
const nodes = [
{
struct: "Document",
kind: ["DOCUMENT"],
pre_blank: true,
first_child: [
["section", "Section"],
["first_headline", "Headline"],
],
last_child: [["last_headline", "Headline"]],
children: [["headlines", "Headline"]],
},
{
struct: "Section",
kind: ["SECTION"],
post_blank: true,
},
{
struct: "Paragraph",
kind: ["PARAGRAPH"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Headline",
kind: ["HEADLINE"],
first_child: [
["section", "Section"],
["planning", "Planning"],
["properties", "PropertyDrawer"],
],
children: [["headlines", "Headline"]],
post_blank: true,
},
{
struct: "HeadlineTitle",
kind: ["HEADLINE_TITLE"],
parent: [["headline", "Headline"]],
},
{
struct: "PropertyDrawer",
kind: ["PROPERTY_DRAWER"],
children: [["node_properties", "NodeProperty"]],
},
{
struct: "NodeProperty",
kind: ["NODE_PROPERTY"],
},
{
struct: "Planning",
kind: ["PLANNING"],
},
{
struct: "OrgTable",
kind: ["ORG_TABLE"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "OrgTableRow",
kind: ["ORG_TABLE_RULE_ROW", "ORG_TABLE_STANDARD_ROW"],
},
{
struct: "OrgTableCell",
kind: ["ORG_TABLE_CELL"],
},
{
struct: "List",
kind: ["LIST"],
children: [["items", "ListItem"]],
affiliated_keywords: true,
},
{
struct: "ListItem",
kind: ["LIST_ITEM"],
},
{
struct: "Drawer",
kind: ["DRAWER"],
},
{
struct: "DynBlock",
kind: ["DYN_BLOCK"],
affiliated_keywords: true,
},
{
struct: "Keyword",
kind: ["KEYWORD"],
},
{
struct: "BabelCall",
kind: ["BABEL_CALL"],
},
{
struct: "AffiliatedKeyword",
kind: ["AFFILIATED_KEYWORD"],
},
{
struct: "TableEl",
kind: ["TABLE_EL"],
post_blank: true,
},
{
struct: "Clock",
kind: ["CLOCK"],
post_blank: true,
},
{
struct: "FnDef",
kind: ["FN_DEF"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Comment",
kind: ["COMMENT"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "Rule",
kind: ["RULE"],
post_blank: true,
},
{
struct: "FixedWidth",
kind: ["FIXED_WIDTH"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "SpecialBlock",
kind: ["SPECIAL_BLOCK"],
affiliated_keywords: true,
},
{
struct: "QuoteBlock",
kind: ["QUOTE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CenterBlock",
kind: ["CENTER_BLOCK"],
affiliated_keywords: true,
},
{
struct: "VerseBlock",
kind: ["VERSE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CommentBlock",
kind: ["COMMENT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExampleBlock",
kind: ["EXAMPLE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExportBlock",
kind: ["EXPORT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "SourceBlock",
kind: ["SOURCE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "InlineCall",
kind: ["INLINE_CALL"],
},
{
struct: "InlineSrc",
kind: ["INLINE_SRC"],
},
{
struct: "Link",
kind: ["LINK"],
},
{
struct: "Cookie",
kind: ["COOKIE"],
},
{
struct: "RadioTarget",
kind: ["RADIO_TARGET"],
},
{
struct: "FnRef",
kind: ["FN_REF"],
},
{
struct: "Macros",
kind: ["MACROS"],
},
{
struct: "Snippet",
kind: ["SNIPPET"],
},
{
struct: "Target",
kind: ["TARGET"],
},
{
struct: "Bold",
kind: ["BOLD"],
},
{
struct: "Strike",
kind: ["STRIKE"],
},
{
struct: "Italic",
kind: ["ITALIC"],
},
{
struct: "Underline",
kind: ["UNDERLINE"],
},
{
struct: "Verbatim",
kind: ["VERBATIM"],
},
{
struct: "Code",
kind: ["CODE"],
token: [["text", "TEXT"]],
},
{
struct: "Timestamp",
kind: ["TIMESTAMP_ACTIVE", "TIMESTAMP_INACTIVE", "TIMESTAMP_DIARY"],
token: [
["year_start", "TIMESTAMP_YEAR"],
["month_start", "TIMESTAMP_MONTH"],
["day_start", "TIMESTAMP_DAY"],
["hour_start", "TIMESTAMP_HOUR"],
["minute_start", "TIMESTAMP_MINUTE"],
],
last_token: [
["year_end", "TIMESTAMP_YEAR"],
["month_end", "TIMESTAMP_MONTH"],
["day_end", "TIMESTAMP_DAY"],
["hour_end", "TIMESTAMP_HOUR"],
["minute_end", "TIMESTAMP_MINUTE"],
],
},
{
struct: "LatexEnvironment",
kind: ["LATEX_ENVIRONMENT"],
},
{
struct: "LatexFragment",
kind: ["LATEX_FRAGMENT"],
},
{
struct: "Entity",
kind: ["ENTITY"],
},
{
struct: "LineBreak",
kind: ["LINE_BREAK"],
},
{
struct: "Superscript",
kind: ["SUPERSCRIPT"],
},
{
struct: "Subscript",
kind: ["SUBSCRIPT"],
},
];
let content = `//! generated file, do not modify it directly
#![allow(clippy::all)]
#![allow(unused)]
use rowan::ast::{support, AstChildren, AstNode};
use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken};
fn affiliated_keyword(node: &SyntaxNode, filter: impl Fn(&str) -> bool) -> Option<AffiliatedKeyword> {
node.children()
.take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD)
.filter_map(AffiliatedKeyword::cast)
.find(|k| filter(&k.key()))
}
`;
for (const node of nodes) {
content += `
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ${node.struct} {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for ${node.struct} {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool { ${node.kind
.map((k) => `kind == ${k}`)
.join(" || ")} }
fn cast(node: SyntaxNode) -> Option<${
node.struct
}> { Self::can_cast(node.kind()).then(|| ${node.struct} { syntax: node }) }
fn syntax(&self) -> &SyntaxNode { &self.syntax }
}
impl ${node.struct} {
pub fn begin(&self) -> u32 {
self.syntax.text_range().start().into()
}
pub fn end(&self) -> u32 {
self.syntax.text_range().end().into()
}
`;
for (const [method, kind] of node.token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.last_token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::last_token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.parent || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { self.syntax.parent().and_then(${kind}::cast) }\n`;
}
for (const [method, kind] of node.first_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { support::child(&self.syntax) }\n`;
}
for (const [method, kind] of node.last_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { super::last_child(&self.syntax) }\n`;
}
for (const [method, kind] of node.children || []) {
content += ` pub fn ${method}(&self) -> AstChildren<${kind}> { support::children(&self.syntax) }\n`;
}
if (node.post_blank) {
content += ` pub fn post_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.pre_blank) {
content += ` pub fn pre_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.affiliated_keywords) {
content += ` pub fn caption(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "CAPTION") }\n`;
content += ` pub fn header(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "HEADER") }\n`;
content += ` pub fn name(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "NAME") }\n`;
content += ` pub fn plot(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "PLOT") }\n`;
content += ` pub fn results(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "RESULTS") }\n`;
content += ` pub fn attr(&self, backend: &str) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k.starts_with("ATTR_") && &k[5..] == backend) }\n`;
}
content += `}\n`;
}
require("fs").writeFileSync(__dirname + "/generated.rs", content);

File diff suppressed because it is too large Load diff

View file

@ -1,429 +0,0 @@
use rowan::NodeOrToken;
use crate::{syntax::SyntaxKind, SyntaxElement};
use super::{filter_token, Headline, Timestamp, Token};
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TodoType {
Todo,
Done,
}
impl Headline {
/// Return level of this headline
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* ").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 1);
/// let hdl = Org::parse("****** hello").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 6);
/// ```
pub fn level(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::HEADLINE_STARS))
.map_or_else(
|| {
debug_assert!(false, "headline must contains starts token");
0
},
|stars| stars.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_keyword().unwrap(), "TODO");
/// ```
pub fn todo_keyword(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk)
if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO
|| tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE =>
{
Some(Token(Some(tk)))
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::{Headline, TodoType}};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Todo);
/// let hdl = Org::parse("*** DONE a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Done);
/// ```
pub fn todo_type(&self) -> Option<TodoType> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO => {
Some(TodoType::Todo)
}
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => {
Some(TodoType::Done)
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::<Headline>().unwrap();
/// let title = hdl.title().map(|n| n.to_string()).collect::<String>();
/// assert_eq!(title, "abc *abc* /abc/ ");
/// ```
pub fn title(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE)
.into_iter()
.flat_map(|n| n.children_with_tokens())
}
/// Return `true` if this headline contains a COMMENT keyword
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* COMMENT").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* COMMENT hello").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* hello").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_commented());
/// ```
pub fn is_commented(&self) -> bool {
self.title()
.next()
.map(|first| {
if let Some(t) = first.as_token() {
let text = t.text();
t.kind() == SyntaxKind::TEXT
&& text.starts_with("COMMENT")
&& (text.len() == 7 || text[7..].starts_with(char::is_whitespace))
} else {
false
}
})
.unwrap_or_default()
}
/// Return `true` if this headline contains an archive tag
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* hello :ARCHIVE:").first_node::<Headline>().unwrap();
/// assert!(hdl.is_archived());
/// let hdl = Org::parse("* hello :ARCHIVED:").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_archived());
/// ```
pub fn is_archived(&self) -> bool {
self.tags().any(|t| t == "ARCHIVE")
}
/// Returns this headline's closed timestamp, or `None` if not set.
pub fn closed(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.closed())
}
/// Returns this headline's scheduled timestamp, or `None` if not set.
pub fn scheduled(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.scheduled())
}
/// Returns this headline's deadline timestamp, or `None` if not set.
pub fn deadline(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.deadline())
}
/// Returns an iterator of text token in this tags
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let tags_vec = |input: &str| {
/// let hdl = Org::parse(input).first_node::<Headline>().unwrap();
/// let tags: Vec<_> = hdl.tags().map(|t| t.to_string()).collect();
/// tags
/// };
///
/// assert_eq!(tags_vec("* :tag:"), vec!["tag".to_string()]);
/// assert_eq!(tags_vec("* [#A] :::::a2%:"), vec!["a2%".to_string()]);
/// assert_eq!(tags_vec("* TODO :tag: :a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// assert_eq!(tags_vec("* title :tag:a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// ```
pub fn tags(&self) -> impl Iterator<Item = Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TAGS)
.into_iter()
.flat_map(|t| t.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
}
/// Returns priority text
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* [#A]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "A");
/// let hdl = Org::parse("** DONE [#B]::").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "B");
/// let hdl = Org::parse("* [#破]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "破");
/// ```
pub fn priority(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_PRIORITY)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
}
// pub enum DocumentOrHeadline {
// Document(Document),
// Headline(Headline),
// }
// impl From<Document> for DocumentOrHeadline {
// fn from(value: Document) -> Self {
// DocumentOrHeadline::Document(value)
// }
// }
// impl From<Headline> for DocumentOrHeadline {
// fn from(value: Headline) -> Self {
// DocumentOrHeadline::Headline(value)
// }
// }
// impl DocumentOrHeadline {
// pub fn section(&self) -> Option<Section> {
// match self {
// DocumentOrHeadline::Document(v) => v.section(),
// DocumentOrHeadline::Headline(v) => v.section(),
// }
// }
// }
// impl Org {
// /// set the title of this headline
// ///
// /// ```rust
// /// use orgize::Org;
// ///
// /// let mut org = Org::parse("* [#A]");
// /// let hdl = org.document().first_headline().unwrap();
// /// org.set_title(hdl, "world");
// /// assert_eq!(org.to_org(), "* [#A] world");
// /// let hdl = org.document().first_headline().unwrap();
// /// org.set_title(hdl, "world!");
// /// assert_eq!(org.to_org(), "* [#A] world!");
// /// ```
// pub fn set_title(&mut self, headline: Headline, title: &str) -> Option<HeadlineTitle> {
// let bytes = title.as_bytes();
// let title = match memchr(b'\n', bytes) {
// Some(i) if i > 0 && bytes[i] == b'\r' => &title[0..i - 1],
// Some(i) => &title[0..i],
// _ => title,
// };
// let new_title = node(HEADLINE_TITLE, object_nodes(self.create_input(title)));
// if let Some(title) = headline.title() {
// self.green = title.syntax.replace_with(new_title.into_node().unwrap());
// return Some(title);
// }
// let mut child: Vec<_> = headline
// .syntax
// .green()
// .children()
// .map(|ch| ch.to_owned())
// .collect();
// let index = support::child
// .iter()
// .enumerate()
// .filter_map(|(idx, it)| {
// if it.kind() == HEADLINE_STARS.into()
// || it.kind() == HEADLINE_KEYWORD.into()
// || it.kind() == HEADLINE_PRIORITY.into()
// {
// Some(idx + 1)
// } else {
// None
// }
// })
// .last()
// .unwrap_or_default();
// if index == child.len() {
// child.push(token(WHITESPACE, " "));
// child.push(new_title);
// } else if child[index].kind() != WHITESPACE.into() {
// child.insert(index, token(WHITESPACE, " "));
// child.insert(index + 1, new_title);
// } else {
// child.insert(index, new_title);
// }
// self.green = headline
// .syntax
// .replace_with(node(HEADLINE, child).into_node().unwrap());
// None
// }
// /// set the section of this document or headline
// ///
// /// ```rust
// /// use orgize::Org;
// ///
// /// let mut org = Org::parse("* hello");
// ///
// /// let hdl = org.document().first_headline().unwrap();
// /// org.set_section(hdl, "world");
// /// assert_eq!(org.to_org(), "* hello\nworld\n");
// ///
// /// let hdl = org.document().first_headline().unwrap();
// /// org.set_section(hdl, "world!");
// /// assert_eq!(org.to_org(), "* hello\nworld!\n");
// ///
// /// let doc = org.document();
// /// org.set_section(doc, "doc");
// /// assert_eq!(org.to_org(), "doc\n* hello\nworld!\n");
// /// ```
// pub fn set_section(
// &mut self,
// document_or_headline: impl Into<DocumentOrHeadline>,
// section: &str,
// ) -> Option<Section> {
// let document_or_headline = document_or_headline.into();
// let section = section_text(self.create_input(section)).ok()?.1.as_str();
// let section = if section.ends_with('\n') {
// section_node(self.create_input(section)).map(|(_, s)| s)
// } else {
// section_node(self.create_input(&format!("{section}\n"))).map(|(_, s)| s)
// }
// .ok()?;
// if let Some(old) = document_or_headline.section() {
// self.green = old.syntax.replace_with(section.into_node().unwrap());
// return Some(old);
// }
// match document_or_headline {
// DocumentOrHeadline::Document(document) => {
// let mut child: Vec<_> = document
// .syntax
// .green()
// .children()
// .map(|ch| ch.to_owned())
// .collect();
// let headline_idx = child.iter().position(|it| it.kind() == HEADLINE.into());
// if let Some(idx) = headline_idx {
// child.insert(idx, section);
// } else {
// child.push(section);
// }
// self.green = document
// .syntax
// .replace_with(GreenNode::new(DOCUMENT.into(), child));
// None
// }
// DocumentOrHeadline::Headline(headline) => {
// let mut child: Vec<_> = headline
// .syntax
// .green()
// .children()
// .map(|ch| ch.to_owned())
// .collect();
// let new_line_idx = support::child
// .iter()
// .position(|it| it.kind() == NEW_LINE.into());
// if let Some(idx) = new_line_idx {
// // add section *after* newline
// if idx < support::child.len() {
// support::child.insert(idx, section);
// } else {
// support::child.push(section);
// }
// } else {
// support::child.push(token(NEW_LINE, "\n"));
// support::child.push(section);
// }
// self.green = headline
// .syntax
// .replace_with(GreenNode::new(HEADLINE.into(), support::child));
// None
// }
// }
// }
// /// set the level of this headline
// ///
// /// ```rust
// /// use orgize::Org;
// ///
// /// let mut org = Org::parse("** 1\n** 2");
// ///
// /// let hdl = org.document().last_headline().unwrap();
// /// org.set_level(hdl, 1);
// /// assert_eq!(org.to_org(), "** 1\n* 2");
// ///
// /// let hdl = org.document().last_headline().unwrap();
// /// org.set_level(hdl, 3);
// /// assert_eq!(org.to_org(), "** 1\n* 2");
// /// ```
// pub fn set_level(&mut self, headline: Headline, level: usize) {
// if level == 0 {
// return;
// }
// let min_level_in_siblings = headline
// .syntax
// .siblings(rowan::Direction::Next)
// .chain(headline.syntax.siblings(rowan::Direction::Prev))
// .filter_map(Headline::cast)
// .filter_map(|headline| headline.level())
// .min()
// .unwrap_or(1);
// if level <= min_level_in_siblings {
// if let Some(stars) = headline.stars() {
// self.green = stars.replace_with(GreenToken::new(
// SyntaxKind::HEADLINE_STARS.into(),
// "*".repeat(level).as_str(),
// ));
// }
// }
// }
// }

View file

@ -1,85 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, InlineCall, Token};
impl InlineCall {
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.call(), "square");
/// ```
pub fn call(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.unwrap_or_else(|| {
debug_assert!(false, "inline call must contains two TEXT");
Token::default()
})
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.inside_header().unwrap(), ":results output");
/// ```
pub fn inside_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.map(|e| {
debug_assert!(e.kind() == SyntaxKind::TEXT);
Token(e.into_token())
})
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.arguments(), "4");
/// ```
pub fn arguments(&self) -> Token {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_PARENS)
.nth(1)
.map_or_else(
|| {
debug_assert!(false);
Token::default()
},
|e| {
debug_assert!(e.kind() == SyntaxKind::TEXT);
Token(e.into_token())
},
)
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)[:results html]").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.end_header().unwrap(), ":results html");
/// ```
pub fn end_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.skip(1)
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.map(|e| {
debug_assert!(e.kind() == SyntaxKind::TEXT);
Token(e.into_token())
})
}
}

View file

@ -1,68 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, InlineSrc, Token};
impl InlineSrc {
/// Language of the code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "C");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "xml");
/// ```
pub fn language(&self) -> Token {
self.syntax
.children_with_tokens()
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "inline src must contains TEXT");
Token::default()
})
}
/// Optional header arguments
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert!(s.parameters().is_none());
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.parameters().unwrap(), ":exports code");
/// ```
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.map(|n| {
debug_assert!(n.kind() == SyntaxKind::TEXT);
Token(n.into_token())
})
}
/// Source code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "int a = 0;");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "<tag>text</tag>");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
.unwrap_or_else(|| {
debug_assert!(false, "inline src must contains TEXT");
Token::default()
})
}
}

View file

@ -1,39 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Keyword, Token};
impl Keyword {
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.key(), "KEY");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "keyword must contains TEXT");
Token::default()
})
}
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), " VALUE");
/// let keyword = Org::parse("#+KEY:").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), "");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.unwrap_or_default()
}
}

View file

@ -1,77 +0,0 @@
use rowan::ast::{support, AstNode};
use super::{AffiliatedKeyword, Link, Paragraph, Token};
use crate::syntax::SyntaxKind;
impl Link {
/// Returns link destination
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[#id]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "#id");
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// ```
pub fn path(&self) -> Token {
support::token(&self.syntax, SyntaxKind::LINK_PATH).map_or_else(
|| {
debug_assert!(false, "link must contains LINK_PATH");
Token::default()
},
|e| Token(Some(e)),
)
}
/// Returns `true` if link contains description
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.has_description());
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert!(link.has_description());
/// ```
pub fn has_description(&self) -> bool {
support::token(self.syntax(), SyntaxKind::TEXT).is_some()
}
/// Returns `true` if link is an image link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.is_image());
/// let link = Org::parse("[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert!(link.is_image());
/// ```
pub fn is_image(&self) -> bool {
const IMAGE_SUFFIX: &[&str] = &[
// https://github.com/bzg/org-mode/blob/7de1e818d5fbe6a05c6b1a007eed07dc27e7246b/lisp/ox.el#L253
".png", ".jpeg", ".jpg", ".gif", ".tiff", ".tif", ".xbm", ".xpm", ".pbm", ".pgm",
".ppm", ".webp", ".avif", ".svg",
];
let path = self.path();
IMAGE_SUFFIX.iter().any(|e| path.ends_with(e)) && !self.has_description()
}
/// Returns caption keyword in this link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("#+CAPTION: image link\n[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert_eq!(link.caption().unwrap().value().unwrap(), " image link");
/// ```
pub fn caption(&self) -> Option<AffiliatedKeyword> {
// TODO: support other element type
Paragraph::cast(self.syntax.parent()?.clone())?.caption()
}
}

View file

@ -1,142 +0,0 @@
use super::{filter_token, List, ListItem, Token};
use crate::{syntax::SyntaxKind, SyntaxElement};
impl List {
/// Returns `true` if this list is an ordered link
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("+ 1").first_node::<List>().unwrap();
/// assert!(!list.is_ordered());
///
/// let list = Org::parse("1. 1").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
///
/// let list = Org::parse("1) 1\n- 2\n3. 3").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
/// ```
pub fn is_ordered(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list muts contains LIST_ITEM");
false
},
|item| item.bullet().starts_with(|c: char| c.is_ascii_digit()),
)
}
/// Returns `true` if this list contains a TAG
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("- some tag :: item 2.1").first_node::<List>().unwrap();
/// assert!(list.is_descriptive());
/// let list = Org::parse("2. [X] item 2").first_node::<List>().unwrap();
/// assert!(!list.is_descriptive());
/// ```
pub fn is_descriptive(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list must contains LIST_ITEM");
false
},
|item| {
item.syntax
.children()
.any(|it| it.kind() == SyntaxKind::LIST_ITEM_TAG)
},
)
}
}
impl ListItem {
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 0);
/// let item = Org::parse(" \t * 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 3);
/// ```
pub fn indent(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_INDENT))
.map_or_else(
|| {
debug_assert!(false, "list item must contains LIST_ITEM_INDENT");
0
},
|t| t.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- some tag").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "- ");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "2. ");
/// ```
pub fn bullet(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_BULLET))
.unwrap_or_else(|| {
debug_assert!(false, "list item must contains LIST_ITEM_BULLET");
Token::default()
})
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- [-] item 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "-");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "X");
/// let item = Org::parse("3) [ ] item 3").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), " ");
/// ```
pub fn checkbox(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_CHECK_BOX)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
pub fn counter(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_COUNTER)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("+ this is *TAG* :: item1").first_node::<ListItem>().unwrap();
/// let tag = item.tag().map(|n| n.to_string()).collect::<String>();
/// assert_eq!(tag, "this is *TAG* ");
/// ```
pub fn tag(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_TAG)
.into_iter()
.flat_map(|n| {
n.children_with_tokens().filter(|n| {
n.kind() != SyntaxKind::WHITESPACE && n.kind() != SyntaxKind::COLON2
})
})
}
}

View file

@ -1,38 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Macros, Token};
impl Macros {
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "title");
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "two_arg_macro");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "macros must contains TEXT");
Token::default()
})
}
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert!(m.args().is_none());
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.args().unwrap(), "1, 2");
/// ```
pub fn args(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
}
}

View file

@ -1,143 +0,0 @@
#[rustfmt::skip]
mod generated;
mod affiliated_keyword;
mod block;
mod clock;
mod comment;
mod drawer;
mod entity;
mod fixed_width;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;
mod planning;
mod snippet;
mod table;
mod timestamp;
use std::{
borrow::{Borrow, Cow},
fmt::Debug,
hash::Hash,
ops::Deref,
};
pub use generated::*;
pub use headline::*;
pub use rowan::ast::support::*;
pub use timestamp::*;
use crate::{
syntax::{SyntaxKind, SyntaxNode},
SyntaxToken,
};
use rowan::{ast::AstNode, NodeOrToken};
pub fn blank_lines(parent: &SyntaxNode) -> usize {
parent
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::BLANK_LINE)
.count()
}
pub fn last_child<N: AstNode>(parent: &rowan::SyntaxNode<N::Language>) -> Option<N> {
parent.children().filter_map(N::cast).last()
}
pub fn last_token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
parent
.children_with_tokens()
.filter_map(filter_token(kind))
.last()
}
pub fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
rowan::ast::support::token(parent, kind).map(|t| Token(Some(t)))
}
pub fn filter_token(
kind: SyntaxKind,
) -> impl Fn(NodeOrToken<SyntaxNode, SyntaxToken>) -> Option<Token> {
move |elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == kind => Some(Token(Some(tk))),
_ => None,
}
}
/// A simple wrapper of `Option<SyntaxToken>`
///
/// It acts like a `token.text()` when inner is `Some(token)`, and an empty string when `None`.
#[derive(Default, Eq)]
pub struct Token(pub(crate) Option<SyntaxToken>);
impl AsRef<str> for Token {
fn as_ref(&self) -> &str {
match &self.0 {
Some(t) => t.text(),
None => "",
}
}
}
impl Borrow<str> for Token {
fn borrow(&self) -> &str {
self.as_ref()
}
}
impl Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_ref().fmt(f)
}
}
impl<'a> PartialEq<&'a str> for Token {
fn eq(&self, other: &&'a str) -> bool {
self.as_ref() == *other
}
}
impl PartialEq<String> for Token {
fn eq(&self, other: &String) -> bool {
self.as_ref() == other
}
}
impl PartialEq<Token> for Token {
fn eq(&self, other: &Token) -> bool {
self.as_ref() == other.as_ref()
}
}
impl Hash for Token {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.as_ref().hash(state)
}
}
impl<'a> PartialEq<Cow<'a, str>> for Token {
fn eq(&self, other: &Cow<'a, str>) -> bool {
self.as_ref() == other
}
}
impl PartialEq<str> for Token {
fn eq(&self, other: &str) -> bool {
self.as_ref() == other
}
}
impl Deref for Token {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_ref()
}
}

View file

@ -1,67 +0,0 @@
use rowan::ast::AstNode;
use super::{Planning, Timestamp};
use crate::syntax::SyntaxKind;
impl Planning {
/// Returns deadline timestamp
///
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nDEADLINE: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .deadline()
/// .unwrap();
/// assert_eq!(s.day_start().unwrap(), "08");
/// ```
pub fn deadline(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_DEADLINE)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns scheduled timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nSCHEDULED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .scheduled()
/// .unwrap();
/// assert_eq!(s.year_start().unwrap(), "2019");
/// ```
pub fn scheduled(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_SCHEDULED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns closed timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nCLOSED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .closed()
/// .unwrap();
/// assert_eq!(s.month_start().unwrap(), "04");
/// ```
pub fn closed(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_CLOSED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
}

View file

@ -1,40 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, Snippet, Token};
impl Snippet {
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.backend(), "BACKEND");
/// ```
pub fn backend(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.unwrap_or_else(|| {
debug_assert!(false, "snippet must contains TEXT");
Token::default()
})
}
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "");
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "VALUE");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.unwrap_or_else(|| {
debug_assert!(false, "snippet must contains two TEXT");
Token::default()
})
}
}

View file

@ -1,110 +0,0 @@
use rowan::ast::AstNode;
use super::{filter_token, OrgTable, OrgTableRow, Token};
use crate::syntax::SyntaxKind;
impl OrgTable {
/// Returns `true` if this table has a header
///
/// A table has a header when it contains at least two row groups.
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let org = Org::parse(r#"
/// | a | b |
/// |---+---|
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | 0 | 1 |
/// |---+---|
/// | a | w |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
///
/// let org = Org::parse(r#"
/// |---+---|
/// | a | b |
/// | c | d |
/// |---+---|"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
/// ```
pub fn has_header(&self) -> bool {
self.syntax
.children()
.filter_map(OrgTableRow::cast)
.skip_while(|row| row.is_rule())
.skip_while(|row| row.is_standard())
.any(|row| !row.is_rule())
}
/// Formulas associated to the table
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let table = Org::parse("| a |").first_node::<OrgTable>().unwrap();
/// assert_eq!(table.tblfm().count(), 0);
///
/// let table = Org::parse("| a |\n#+tblfm: test").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 1);
/// assert_eq!(tblfm[0], " test");
///
/// let table = Org::parse("| a |\n#+TBLFM: test1\n#+TBLFM: test2").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 2);
/// assert_eq!(tblfm[0], " test1");
/// assert_eq!(tblfm[1], " test2");
/// ```
pub fn tblfm(&self) -> impl Iterator<Item = Token> {
self.syntax.children().filter_map(|n| {
if n.kind() == SyntaxKind::KEYWORD {
n.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
} else {
None
}
})
}
}
impl OrgTableRow {
/// Returns `true` if this row is a rule
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|----|----|\n|Foo |Bar |");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_rule());
/// ```
pub fn is_rule(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_RULE_ROW
}
/// Returns `true` if this row is a standard row
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|Foo |Bar |\n|----|----|");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_standard());
/// ```
pub fn is_standard(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_STANDARD_ROW
}
}

View file

@ -1,288 +0,0 @@
use super::{filter_token, Timestamp};
use crate::syntax::SyntaxKind;
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum DelayType {
All,
First,
}
impl Timestamp {
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<2003-09-16 Tue 09:39-10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>--<2003-09-16 Tue 10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// ```
pub fn is_active(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_ACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// ```
pub fn is_inactive(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_INACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<%%(org-calendar-holiday)>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_diary());
/// ```
pub fn is_diary(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_DIARY
}
/// Returns `true` if this timestamp has a range
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16]--[2003-09-16]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(!ts.is_range());
/// ```
pub fn is_range(&self) -> bool {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::MINUS))
.count()
> 2
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, RepeaterType}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Cumulate));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Restart));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), None);
/// ```
pub fn repeater_type(&self) -> Option<RepeaterType> {
self.nth_repeater(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(1));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(10));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), None);
/// ```
pub fn repeater_value(&self) -> Option<u32> {
self.nth_repeater(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Day));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), None);
/// ```
pub fn repeater_unit(&self) -> Option<TimeUnit> {
self.nth_repeater(0).map(|i| i.2)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, DelayType}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::First));
/// ```
pub fn warning_type(&self) -> Option<DelayType> {
self.nth_delay(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(3));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(5));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(10));
/// ```
pub fn warning_value(&self) -> Option<u32> {
self.nth_delay(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Year));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Month));
/// ```
pub fn warning_unit(&self) -> Option<TimeUnit> {
self.nth_delay(0).map(|i| i.2)
}
fn nth_repeater(&self, nth: usize) -> Option<(RepeaterType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_REPEATER_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"++" => Some(RepeaterType::CatchUp),
"+" => Some(RepeaterType::Cumulate),
".+" => Some(RepeaterType::Restart),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
fn nth_delay(&self, nth: usize) -> Option<(DelayType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_DELAY_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"-" => Some(DelayType::All),
"--" => Some(DelayType::First),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
/// Converts timestamp start to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.start_to_chrono().unwrap(), "2003-09-16T09:39:00".parse::<NaiveDateTime>().unwrap());
///
/// let ts = Org::parse("[2003-13-00 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.start_to_chrono().is_none());
/// ```
#[cfg(feature = "chrono")]
pub fn start_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_start()?.parse().ok()?,
self.month_start()?.parse().ok()?,
self.day_start()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_start()?.parse().ok()?,
self.minute_start()?.parse().ok()?,
0,
)?,
))
}
/// Converts timestamp end to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.end_to_chrono().unwrap(), "2003-09-16T10:39:00".parse::<NaiveDateTime>().unwrap());
/// ```
#[cfg(feature = "chrono")]
pub fn end_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_end()?.parse().ok()?,
self.month_end()?.parse().ok()?,
self.day_end()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_end()?.parse().ok()?,
self.minute_end()?.parse().ok()?,
0,
)?,
))
}
}

View file

@ -1,56 +0,0 @@
use crate::syntax::document::document_node;
use crate::Org;
/// Parse configuration
#[derive(Clone, Debug)]
pub struct ParseConfig {
/// Headline's todo keywords
pub todo_keywords: (Vec<String>, Vec<String>),
pub dual_keywords: Vec<String>,
pub parsed_keywords: Vec<String>,
/// Affiliated keywords
///
/// Equivalent to [`org-element-affiliated-keywords`](https://git.sr.ht/~bzg/org-mode/tree/6f960f3c6a4dfe137fbd33fef9f7dadfd229600c/item/lisp/org-element.el#L331)
pub affiliated_keywords: Vec<String>,
}
impl ParseConfig {
/// Parses input with current config
pub fn parse(self, input: impl AsRef<str>) -> Org {
let input = (input.as_ref(), &self).into();
let node = document_node(input).unwrap().1;
Org {
config: self,
green: node.into_node().unwrap(),
}
}
}
impl Default for ParseConfig {
fn default() -> Self {
ParseConfig {
todo_keywords: (vec!["TODO".into()], vec!["DONE".into()]),
dual_keywords: vec!["CAPTION".into(), "RESULTS".into()],
parsed_keywords: vec!["CAPTION".into()],
affiliated_keywords: vec![
"CAPTION".into(),
"DATA".into(),
"HEADER".into(),
"HEADERS".into(),
"LABEL".into(),
"NAME".into(),
"PLOT".into(),
"RESNAME".into(),
"RESULT".into(),
"RESULTS".into(),
"SOURCE".into(),
"SRCNAME".into(),
"TBLNAME".into(),
],
}
}
}

View file

@ -1,468 +0,0 @@
// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85
// nil -> false
// t -> true
// \x00A0 -> \\x00A0
#[rustfmt::skip]
pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[
// ("* Letters"
// Latin
("Agrave", "\\`{A}", false, "&Agrave;", "A", "À", "À"),
("agrave", "\\`{a}", false, "&agrave;", "a", "à", "à"),
("Aacute", "\\'{A}", false, "&Aacute;", "A", "Á", "Á"),
("aacute", "\\'{a}", false, "&aacute;", "a", "á", "á"),
("Acirc", "\\^{A}", false, "&Acirc;", "A", "Â", "Â"),
("acirc", "\\^{a}", false, "&acirc;", "a", "â", "â"),
("Amacr", "\\={A}", false, "&Amacr;", "A", "Ã", "Ã"),
("amacr", "\\={a}", false, "&amacr;", "a", "ã", "ã"),
("Atilde", "\\~{A}", false, "&Atilde;", "A", "Ã", "Ã"),
("atilde", "\\~{a}", false, "&atilde;", "a", "ã", "ã"),
("Auml", "\\\"{A}", false, "&Auml;", "Ae", "Ä", "Ä"),
("auml", "\\\"{a}", false, "&auml;", "ae", "ä", "ä"),
("Aring", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("AA", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("aring", "\\aa{}", false, "&aring;", "a", "å", "å"),
("AElig", "\\AE{}", false, "&AElig;", "AE", "Æ", "Æ"),
("aelig", "\\ae{}", false, "&aelig;", "ae", "æ", "æ"),
("Ccedil", "\\c{C}", false, "&Ccedil;", "C", "Ç", "Ç"),
("ccedil", "\\c{c}", false, "&ccedil;", "c", "ç", "ç"),
("Egrave", "\\`{E}", false, "&Egrave;", "E", "È", "È"),
("egrave", "\\`{e}", false, "&egrave;", "e", "è", "è"),
("Eacute", "\\'{E}", false, "&Eacute;", "E", "É", "É"),
("eacute", "\\'{e}", false, "&eacute;", "e", "é", "é"),
("Ecirc", "\\^{E}", false, "&Ecirc;", "E", "Ê", "Ê"),
("ecirc", "\\^{e}", false, "&ecirc;", "e", "ê", "ê"),
("Euml", "\\\"{E}", false, "&Euml;", "E", "Ë", "Ë"),
("euml", "\\\"{e}", false, "&euml;", "e", "ë", "ë"),
("Igrave", "\\`{I}", false, "&Igrave;", "I", "Ì", "Ì"),
("igrave", "\\`{i}", false, "&igrave;", "i", "ì", "ì"),
("Iacute", "\\'{I}", false, "&Iacute;", "I", "Í", "Í"),
("iacute", "\\'{i}", false, "&iacute;", "i", "í", "í"),
("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"),
("inodot", "\\i", false, "&inodot;", "i", "ı", "ı"),
("Icirc", "\\^{I}", false, "&Icirc;", "I", "Î", "Î"),
("icirc", "\\^{i}", false, "&icirc;", "i", "î", "î"),
("Iuml", "\\\"{I}", false, "&Iuml;", "I", "Ï", "Ï"),
("iuml", "\\\"{i}", false, "&iuml;", "i", "ï", "ï"),
("Ntilde", "\\~{N}", false, "&Ntilde;", "N", "Ñ", "Ñ"),
("ntilde", "\\~{n}", false, "&ntilde;", "n", "ñ", "ñ"),
("Ograve", "\\`{O}", false, "&Ograve;", "O", "Ò", "Ò"),
("ograve", "\\`{o}", false, "&ograve;", "o", "ò", "ò"),
("Oacute", "\\'{O}", false, "&Oacute;", "O", "Ó", "Ó"),
("oacute", "\\'{o}", false, "&oacute;", "o", "ó", "ó"),
("Ocirc", "\\^{O}", false, "&Ocirc;", "O", "Ô", "Ô"),
("ocirc", "\\^{o}", false, "&ocirc;", "o", "ô", "ô"),
("Otilde", "\\~{O}", false, "&Otilde;", "O", "Õ", "Õ"),
("otilde", "\\~{o}", false, "&otilde;", "o", "õ", "õ"),
("Ouml", "\\\"{O}", false, "&Ouml;", "Oe", "Ö", "Ö"),
("ouml", "\\\"{o}", false, "&ouml;", "oe", "ö", "ö"),
("Oslash", "\\O", false, "&Oslash;", "O", "Ø", "Ø"),
("oslash", "\\o{}", false, "&oslash;", "o", "ø", "ø"),
("OElig", "\\OE{}", false, "&OElig;", "OE", "OE", "Œ"),
("oelig", "\\oe{}", false, "&oelig;", "oe", "oe", "œ"),
("Scaron", "\\v{S}", false, "&Scaron;", "S", "S", "Š"),
("scaron", "\\v{s}", false, "&scaron;", "s", "s", "š"),
("szlig", "\\ss{}", false, "&szlig;", "ss", "ß", "ß"),
("Ugrave", "\\`{U}", false, "&Ugrave;", "U", "Ù", "Ù"),
("ugrave", "\\`{u}", false, "&ugrave;", "u", "ù", "ù"),
("Uacute", "\\'{U}", false, "&Uacute;", "U", "Ú", "Ú"),
("uacute", "\\'{u}", false, "&uacute;", "u", "ú", "ú"),
("Ucirc", "\\^{U}", false, "&Ucirc;", "U", "Û", "Û"),
("ucirc", "\\^{u}", false, "&ucirc;", "u", "û", "û"),
("Uuml", "\\\"{U}", false, "&Uuml;", "Ue", "Ü", "Ü"),
("uuml", "\\\"{u}", false, "&uuml;", "ue", "ü", "ü"),
("Yacute", "\\'{Y}", false, "&Yacute;", "Y", "Ý", "Ý"),
("yacute", "\\'{y}", false, "&yacute;", "y", "ý", "ý"),
("Yuml", "\\\"{Y}", false, "&Yuml;", "Y", "Y", "Ÿ"),
("yuml", "\\\"{y}", false, "&yuml;", "y", "ÿ", "ÿ"),
// Latin (special face)
("fnof", "\\textit{f}", false, "&fnof;", "f", "f", "ƒ"),
("real", "\\Re", true, "&real;", "R", "R", ""),
("image", "\\Im", true, "&image;", "I", "I", ""),
("weierp", "\\wp", true, "&weierp;", "P", "P", ""),
("ell", "\\ell", true, "&ell;", "ell", "ell", ""),
("imath", "\\imath", true, "&imath;", "[dotless i]", "dotless i", "ı"),
("jmath", "\\jmath", true, "&jmath;", "[dotless j]", "dotless j", "ȷ"),
// Greek
("Alpha", "A", false, "&Alpha;", "Alpha", "Alpha", "Α"),
("alpha", "\\alpha", true, "&alpha;", "alpha", "alpha", "α"),
("Beta", "B", false, "&Beta;", "Beta", "Beta", "Β"),
("beta", "\\beta", true, "&beta;", "beta", "beta", "β"),
("Gamma", "\\Gamma", true, "&Gamma;", "Gamma", "Gamma", "Γ"),
("gamma", "\\gamma", true, "&gamma;", "gamma", "gamma", "γ"),
("Delta", "\\Delta", true, "&Delta;", "Delta", "Delta", "Δ"),
("delta", "\\delta", true, "&delta;", "delta", "delta", "δ"),
("Epsilon", "E", false, "&Epsilon;", "Epsilon", "Epsilon", "Ε"),
("epsilon", "\\epsilon", true, "&epsilon;", "epsilon", "epsilon", "ε"),
("varepsilon", "\\varepsilon", true, "&epsilon;", "varepsilon", "varepsilon", "ε"),
("Zeta", "Z", false, "&Zeta;", "Zeta", "Zeta", "Ζ"),
("zeta", "\\zeta", true, "&zeta;", "zeta", "zeta", "ζ"),
("Eta", "H", false, "&Eta;", "Eta", "Eta", "Η"),
("eta", "\\eta", true, "&eta;", "eta", "eta", "η"),
("Theta", "\\Theta", true, "&Theta;", "Theta", "Theta", "Θ"),
("theta", "\\theta", true, "&theta;", "theta", "theta", "θ"),
("thetasym", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("vartheta", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("Iota", "I", false, "&Iota;", "Iota", "Iota", "Ι"),
("iota", "\\iota", true, "&iota;", "iota", "iota", "ι"),
("Kappa", "K", false, "&Kappa;", "Kappa", "Kappa", "Κ"),
("kappa", "\\kappa", true, "&kappa;", "kappa", "kappa", "κ"),
("Lambda", "\\Lambda", true, "&Lambda;", "Lambda", "Lambda", "Λ"),
("lambda", "\\lambda", true, "&lambda;", "lambda", "lambda", "λ"),
("Mu", "M", false, "&Mu;", "Mu", "Mu", "Μ"),
("mu", "\\mu", true, "&mu;", "mu", "mu", "μ"),
("nu", "\\nu", true, "&nu;", "nu", "nu", "ν"),
("Nu", "N", false, "&Nu;", "Nu", "Nu", "Ν"),
("Xi", "\\Xi", true, "&Xi;", "Xi", "Xi", "Ξ"),
("xi", "\\xi", true, "&xi;", "xi", "xi", "ξ"),
("Omicron", "O", false, "&Omicron;", "Omicron", "Omicron", "Ο"),
("omicron", "\\textit{o}", false, "&omicron;", "omicron", "omicron", "ο"),
("Pi", "\\Pi", true, "&Pi;", "Pi", "Pi", "Π"),
("pi", "\\pi", true, "&pi;", "pi", "pi", "π"),
("Rho", "P", false, "&Rho;", "Rho", "Rho", "Ρ"),
("rho", "\\rho", true, "&rho;", "rho", "rho", "ρ"),
("Sigma", "\\Sigma", true, "&Sigma;", "Sigma", "Sigma", "Σ"),
("sigma", "\\sigma", true, "&sigma;", "sigma", "sigma", "σ"),
("sigmaf", "\\varsigma", true, "&sigmaf;", "sigmaf", "sigmaf", "ς"),
("varsigma", "\\varsigma", true, "&sigmaf;", "varsigma", "varsigma", "ς"),
("Tau", "T", false, "&Tau;", "Tau", "Tau", "Τ"),
("Upsilon", "\\Upsilon", true, "&Upsilon;", "Upsilon", "Upsilon", "Υ"),
("upsih", "\\Upsilon", true, "&upsih;", "upsilon", "upsilon", "ϒ"),
("upsilon", "\\upsilon", true, "&upsilon;", "upsilon", "upsilon", "υ"),
("Phi", "\\Phi", true, "&Phi;", "Phi", "Phi", "Φ"),
("phi", "\\phi", true, "&phi;", "phi", "phi", "ɸ"),
("varphi", "\\varphi", true, "&varphi;", "varphi", "varphi", "φ"),
("Chi", "X", false, "&Chi;", "Chi", "Chi", "Χ"),
("chi", "\\chi", true, "&chi;", "chi", "chi", "χ"),
("acutex", "\\acute x", true, "&acute;x", "'x", "'x", "𝑥́"),
("Psi", "\\Psi", true, "&Psi;", "Psi", "Psi", "Ψ"),
("psi", "\\psi", true, "&psi;", "psi", "psi", "ψ"),
("tau", "\\tau", true, "&tau;", "tau", "tau", "τ"),
("Omega", "\\Omega", true, "&Omega;", "Omega", "Omega", "Ω"),
("omega", "\\omega", true, "&omega;", "omega", "omega", "ω"),
("piv", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("varpi", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("partial", "\\partial", true, "&part;", "[partial differential]", "[partial differential]", ""),
// Hebrew
("alefsym", "\\aleph", true, "&alefsym;", "aleph", "aleph", ""),
("aleph", "\\aleph", true, "&aleph;", "aleph", "aleph", ""),
("gimel", "\\gimel", true, "&gimel;", "gimel", "gimel", ""),
("beth", "\\beth", true, "&beth;", "beth", "beth", "ב"),
("dalet", "\\daleth", true, "&daleth;", "dalet", "dalet", "ד"),
// Icelandic
("ETH", "\\DH{}", false, "&ETH;", "D", "Ð", "Ð"),
("eth", "\\dh{}", false, "&eth;", "dh", "ð", "ð"),
("THORN", "\\TH{}", false, "&THORN;", "TH", "Þ", "Þ"),
("thorn", "\\th{}", false, "&thorn;", "th", "þ", "þ"),
//, "* Punctuation",
// Dots and Marks
("dots", "\\dots{}", false, "&hellip;", "...", "...", ""),
("cdots", "\\cdots{}", true, "&ctdot;", "...", "...", ""),
("hellip", "\\dots{}", false, "&hellip;", "...", "...", ""),
("middot", "\\textperiodcentered{}", false, "&middot;", ".", "·", "·"),
("iexcl", "!`", false, "&iexcl;", "!", "¡", "¡"),
("iquest", "?`", false, "&iquest;", "?", "¿", "¿"),
// Dash-like
("shy", "\\-", false, "&shy;", "", "", ""),
("ndash", "--", false, "&ndash;", "-", "-", ""),
("mdash", "---", false, "&mdash;", "--", "--", ""),
// Quotations
("quot", "\\textquotedbl{}", false, "&quot;", "\"", "\"", "\""),
("acute", "\\textasciiacute{}", false, "&acute;", "'", "´", "´"),
("ldquo", "\\textquotedblleft{}", false, "&ldquo;", "\"", "\"", ""),
("rdquo", "\\textquotedblright{}", false, "&rdquo;", "\"", "\"", ""),
("bdquo", "\\quotedblbase{}", false, "&bdquo;", "\"", "\"", ""),
("lsquo", "\\textquoteleft{}", false, "&lsquo;", "`", "`", ""),
("rsquo", "\\textquoteright{}", false, "&rsquo;", "'", "'", ""),
("sbquo", "\\quotesinglbase{}", false, "&sbquo;", ", ", ", ", ""),
("laquo", "\\guillemotleft{}", false, "&laquo;", "<<", "«", "«"),
("raquo", "\\guillemotright{}", false, "&raquo;", ">>", "»", "»"),
("lsaquo", "\\guilsinglleft{}", false, "&lsaquo;", "<", "<", ""),
("rsaquo", "\\guilsinglright{}", false, "&rsaquo;", ">", ">", ""),
//, "* Other",
// Misc. (often used)
("circ", "\\^{}", false, "&circ;", "^", "^", ""),
("vert", "\\vert{}", true, "&vert;", "|", "|", "|"),
("vbar", "|", false, "|", "|", "|", "|"),
("brvbar", "\\textbrokenbar{}", false, "&brvbar;", "|", "¦", "¦"),
("S", "\\S", false, "&sect;", "section", "§", "§"),
("sect", "\\S", false, "&sect;", "section", "§", "§"),
("P", "\\P{}", false, "&para;", "paragraph", "", ""),
("para", "\\P{}", false, "&para;", "paragraph", "", ""),
("amp", "\\&", false, "&amp;", "&", "&", "&"),
("lt", "\\textless{}", false, "&lt;", "<", "<", "<"),
("gt", "\\textgreater{}", false, "&gt;", ">", ">", ">"),
("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"),
("slash", "/", false, "/", "/", "/", "/"),
("plus", "+", false, "+", "+", "+", "+"),
("under", "\\_", false, "_", "_", "_", "_"),
("equal", "=", false, "=", "=", "=", "="),
("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"),
("dagger", "\\textdagger{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("dag", "\\dag{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("Dagger", "\\textdaggerdbl{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
("ddag", "\\ddag{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
// Whitespace
("nbsp", "~", false, "&nbsp;", ", ", "\\x00A0", "\\x00A0"),
("ensp", "\\hspace*{.5em}", false, "&ensp;", ", ", ", ", ""),
("emsp", "\\hspace*{1em}", false, "&emsp;", ", ", ", ", ""),
("thinsp", "\\hspace*{.2em}", false, "&thinsp;", ", ", ", ", ""),
// Currency
("curren", "\\textcurrency{}", false, "&curren;", "curr.", "¤", "¤"),
("cent", "\\textcent{}", false, "&cent;", "cent", "¢", "¢"),
("pound", "\\pounds{}", false, "&pound;", "pound", "£", "£"),
("yen", "\\textyen{}", false, "&yen;", "yen", "¥", "¥"),
("euro", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("EUR", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("dollar", "\\$", false, "$", "$", "$", "$"),
("USD", "\\$", false, "$", "$", "$", "$"),
// Property Marks
("copy", "\\textcopyright{}", false, "&copy;", "(c)", "©", "©"),
("reg", "\\textregistered{}", false, "&reg;", "(r)", "®", "®"),
("trade", "\\texttrademark{}", false, "&trade;", "TM", "TM", ""),
// Science, etrueal.
("minus", "-", true, "&minus;", "-", "-", ""),
("pm", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("plusmn", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("times", "\\texttimes{}", false, "&times;", "*", "×", "×"),
("frasl", "/", false, "&frasl;", "/", "/", ""),
("colon", "\\colon", true, ":", ":", ":", ":"),
("div", "\\textdiv{}", false, "&divide;", "/", "÷", "÷"),
("frac12", "\\textonehalf{}", false, "&frac12;", "1/2", "½", "½"),
("frac14", "\\textonequarter{}", false, "&frac14;", "1/4", "¼", "¼"),
("frac34", "\\textthreequarters{}", false, "&frac34;", "3/4", "¾", "¾"),
("permil", "\\textperthousand{}", false, "&permil;", "per thousand", "per thousand", ""),
("sup1", "\\textonesuperior{}", false, "&sup1;", "^1", "¹", "¹"),
("sup2", "\\texttwosuperior{}", false, "&sup2;", "^2", "²", "²"),
("sup3", "\\textthreesuperior{}", false, "&sup3;", "^3", "³", "³"),
("radic", "\\sqrt{\\,}", true, "&radic;", "[square root]", "[square root]", ""),
("sum", "\\sum", true, "&sum;", "[sum]", "[sum]", ""),
("prod", "\\prod", true, "&prod;", "[product]", "[n-ary product]", ""),
("micro", "\\textmu{}", false, "&micro;", "micro", "µ", "µ"),
("macr", "\\textasciimacron{}", false, "&macr;", "[macron]", "¯", "¯"),
("deg", "\\textdegree{}", false, "&deg;", "degree", "°", "°"),
("prime", "\\prime", true, "&prime;", "'", "'", ""),
("Prime", "\\prime{}\\prime", true, "&Prime;", "''", "''", ""),
("infin", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("infty", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("prop", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("propto", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("not", "\\textlnot{}", false, "&not;", "[angled dash]", "¬", "¬"),
("neg", "\\neg{}", true, "&not;", "[angled dash]", "¬", "¬"),
("land", "\\land", true, "&and;", "[logical and]", "[logical and]", ""),
("wedge", "\\wedge", true, "&and;", "[logical and]", "[logical and]", ""),
("lor", "\\lor", true, "&or;", "[logical or]", "[logical or]", ""),
("vee", "\\vee", true, "&or;", "[logical or]", "[logical or]", ""),
("cap", "\\cap", true, "&cap;", "[intersection]", "[intersection]", ""),
("cup", "\\cup", true, "&cup;", "[union]", "[union]", ""),
("smile", "\\smile", true, "&smile;", "[cup product]", "[cup product]", ""),
("frown", "\\frown", true, "&frown;", "[Cap product]", "[cap product]", ""),
("int", "\\int", true, "&int;", "[integral]", "[integral]", ""),
("therefore", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("there4", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("because", "\\because", true, "&because;", "[because]", "[because]", ""),
("sim", "\\sim", true, "&sim;", "~", "~", ""),
("cong", "\\cong", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("simeq", "\\simeq", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("asymp", "\\asymp", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("approx", "\\approx", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("ne", "\\ne", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("neq", "\\neq", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("equiv", "\\equiv", true, "&equiv;", "[identical to]", "[identical to]", ""),
("triangleq", "\\triangleq", true, "&triangleq;", "[defined to]", "[defined to]", ""),
("le", "\\le", true, "&le;", "<=", "<=", ""),
("leq", "\\le", true, "&le;", "<=", "<=", ""),
("ge", "\\ge", true, "&ge;", ">=", ">=", ""),
("geq", "\\ge", true, "&ge;", ">=", ">=", ""),
("lessgtr", "\\lessgtr", true, "&lessgtr;", "[less than or greater than]", "[less than or greater than]", ""),
("lesseqgtr", "\\lesseqgtr", true, "&lesseqgtr;", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", ""),
("ll", "\\ll", true, "&Lt;", "<<", "<<", ""),
("Ll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("lll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("gg", "\\gg", true, "&Gt;", ">>", ">>", ""),
("Gg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("ggg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("prec", "\\prec", true, "&pr;", "[precedes]", "[precedes]", ""),
("preceq", "\\preceq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("preccurlyeq", "\\preccurlyeq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("succ", "\\succ", true, "&sc;", "[succeeds]", "[succeeds]", ""),
("succeq", "\\succeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("succcurlyeq", "\\succcurlyeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("sub", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("subset", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("sup", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("supset", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("nsub", "\\not\\subset", true, "&nsub;", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", ""),
("sube", "\\subseteq", true, "&sube;", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", ""),
("nsup", "\\not\\supset", true, "&nsup;", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", ""),
("supe", "\\supseteq", true, "&supe;", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", ""),
("setminus", "\\setminus", true, "&setminus;", "\\", "\\", ""),
("forall", "\\forall", true, "&forall;", "[for all]", "[for all]", ""),
("exist", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("exists", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("nexist", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("nexists", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("empty", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("emptyset", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("isin", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("in", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("notin", "\\notin", true, "&notin;", "[, notruean, elementrueof]", "[, notruean, elementrueof]", ""),
("ni", "\\ni", true, "&ni;", "[contains as member]", "[contains as member]", ""),
("nabla", "\\nabla", true, "&nabla;", "[nabla]", "[nabla]", ""),
("ang", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("angle", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("perp", "\\perp", true, "&perp;", "[up tack]", "[up tack]", ""),
("parallel", "\\parallel", true, "&parallel;", "||", "||", ""),
("sdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("cdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("lceil", "\\lceil", true, "&lceil;", "[, leftrueceiling]", "[, leftrueceiling]", ""),
("rceil", "\\rceil", true, "&rceil;", "[, rightrueceiling]", "[, rightrueceiling]", ""),
("lfloor", "\\lfloor", true, "&lfloor;", "[, leftruefloor]", "[, leftruefloor]", ""),
("rfloor", "\\rfloor", true, "&rfloor;", "[, rightruefloor]", "[, rightruefloor]", ""),
("lang", "\\langle", true, "&lang;", "<", "<", ""),
("rang", "\\rangle", true, "&rang;", ">", ">", ""),
("langle", "\\langle", true, "&lang;", "<", "<", ""),
("rangle", "\\rangle", true, "&rang;", ">", ">", ""),
("hbar", "\\hbar", true, "&hbar;", "hbar", "hbar", ""),
("mho", "\\mho", true, "&mho;", "mho", "mho", ""),
// Arrows
("larr", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("leftarrow", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("gets", "\\gets", true, "&larr;", "<-", "<-", ""),
("lArr", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("Leftarrow", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("uarr", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uparrow", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uArr", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("Uparrow", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("rarr", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("to", "\\to", true, "&rarr;", "->", "->", ""),
("rightarrow", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("rArr", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("Rightarrow", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("darr", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("downarrow", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("dArr", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("Downarrow", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("harr", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("leftrightarrow", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("hArr", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("Leftrightarrow", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("crarr", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
("hookleftarrow", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
// Function names
("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"),
("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"),
("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"),
("arg", "\\arg", true, "arg", "arg", "arg", "arg"),
("cos", "\\cos", true, "cos", "cos", "cos", "cos"),
("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"),
("cot", "\\cot", true, "cot", "cot", "cot", "cot"),
("coth", "\\coth", true, "coth", "coth", "coth", "coth"),
("csc", "\\csc", true, "csc", "csc", "csc", "csc"),
("deg", "\\deg", true, "&deg;", "deg", "deg", "deg"),
("det", "\\det", true, "det", "det", "det", "det"),
("dim", "\\dim", true, "dim", "dim", "dim", "dim"),
("exp", "\\exp", true, "exp", "exp", "exp", "exp"),
("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"),
("hom", "\\hom", true, "hom", "hom", "hom", "hom"),
("inf", "\\inf", true, "inf", "inf", "inf", "inf"),
("ker", "\\ker", true, "ker", "ker", "ker", "ker"),
("lg", "\\lg", true, "lg", "lg", "lg", "lg"),
("lim", "\\lim", true, "lim", "lim", "lim", "lim"),
("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"),
("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"),
("ln", "\\ln", true, "ln", "ln", "ln", "ln"),
("log", "\\log", true, "log", "log", "log", "log"),
("max", "\\max", true, "max", "max", "max", "max"),
("min", "\\min", true, "min", "min", "min", "min"),
("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"),
("sec", "\\sec", true, "sec", "sec", "sec", "sec"),
("sin", "\\sin", true, "sin", "sin", "sin", "sin"),
("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"),
("sup", "\\sup", true, "&sup;", "sup", "sup", "sup"),
("tan", "\\tan", true, "tan", "tan", "tan", "tan"),
("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"),
// Signs & Symbols
("bull", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("bullet", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("star", "\\star", true, "*", "*", "*", ""),
("lowast", "\\ast", true, "&lowast;", "*", "*", ""),
("ast", "\\ast", true, "&lowast;", "*", "*", "*"),
("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"),
("oplus", "\\oplus", true, "&oplus;", "[circled plus]", "[circled plus]", ""),
("otimes", "\\otimes", true, "&otimes;", "[circled times]", "[circled times]", ""),
("check", "\\checkmark", true, "&checkmark;", "[checkmark]", "[checkmark]", ""),
("checkmark", "\\checkmark", true, "&check;", "[checkmark]", "[checkmark]", ""),
// Miscellaneous (seldom used)
("ordf", "\\textordfeminine{}", false, "&ordf;", "_a_", "ª", "ª"),
("ordm", "\\textordmasculine{}", false, "&ordm;", "_o_", "º", "º"),
("cedil", "\\c{}", false, "&cedil;", "[cedilla]", "¸", "¸"),
("oline", "\\overline{~}", true, "&oline;", "[overline]", "¯", ""),
("uml", "\\textasciidieresis{}", false, "&uml;", "[diaeresis]", "¨", "¨"),
("zwnj", "\\/{}", false, "&zwnj;", "", "", ""),
("zwj", "", false, "&zwj;", "", "", ""),
("lrm", "", false, "&lrm;", "", "", "\u{200E}"),
("rlm", "", false, "&rlm;", "", "", "\u{200F}"),
// Smilies
("smiley", "\\ddot\\smile", true, "&#9786;", ":-)", ":-)", ""),
("blacksmile", "\\ddot\\smile", true, "&#9787;", ":-)", ":-)", ""),
("sad", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
("frowny", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
// Suits
("clubs", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("clubsuit", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("spades", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("spadesuit", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("hearts", "\\heartsuit", true, "&hearts;", "[hearts]", "[hearts]", ""),
("heartsuit", "\\heartsuit", true, "&heartsuit;", "[hearts]", "[hearts]", ""),
("diams", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamondsuit", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("Diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("loz", "\\lozenge", true, "&loz;", "[lozenge]", "[lozenge]", ""),
// spaces
// fish shell:
// for i in (seq 1 20)
// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i '&ensp;')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")'
// end
(" ", "\\hspace*{0.5em}", true, "&ensp;", " ", " ", "\\x2002"),
(" ", "\\hspace*{1em}", true, "&ensp;&ensp;", " ", " ", "\\x2002\\x2002"),
(" ", "\\hspace*{1.5em}", true, "&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2em}", true, "&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{10em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
];

View file

@ -1,70 +0,0 @@
use crate::{ast::*, SyntaxToken};
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Container {
Document(Document),
Section(Section),
Paragraph(Paragraph),
Headline(Headline),
OrgTable(OrgTable),
OrgTableRow(OrgTableRow),
OrgTableCell(OrgTableCell),
TableEl(TableEl),
List(List),
ListItem(ListItem),
Drawer(Drawer),
DynBlock(DynBlock),
FnDef(FnDef),
Comment(Comment),
FixedWidth(FixedWidth),
SpecialBlock(SpecialBlock),
QuoteBlock(QuoteBlock),
CenterBlock(CenterBlock),
VerseBlock(VerseBlock),
CommentBlock(CommentBlock),
ExampleBlock(ExampleBlock),
ExportBlock(ExportBlock),
SourceBlock(SourceBlock),
Link(Link),
RadioTarget(RadioTarget),
FnRef(FnRef),
Target(Target),
Bold(Bold),
Strike(Strike),
Italic(Italic),
Underline(Underline),
Verbatim(Verbatim),
Code(Code),
Superscript(Superscript),
Subscript(Subscript),
BabelCall(BabelCall),
PropertyDrawer(PropertyDrawer),
AffiliatedKeyword(AffiliatedKeyword),
Keyword(Keyword),
}
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Event {
Enter(Container),
Leave(Container),
Text(SyntaxToken),
Macros(Macros),
Cookie(Cookie),
InlineCall(InlineCall),
InlineSrc(InlineSrc),
Clock(Clock),
LineBreak(LineBreak),
Snippet(Snippet),
Rule(Rule),
Timestamp(Timestamp),
LatexFragment(LatexFragment),
LatexEnvironment(LatexEnvironment),
Entity(Entity),
}

View file

@ -1,308 +0,0 @@
use rowan::NodeOrToken;
use std::cmp::min;
use std::fmt;
use std::fmt::Write as _;
use super::event::{Container, Event};
use super::TraversalContext;
use super::Traverser;
use crate::SyntaxKind;
/// A wrapper for escaping sensitive characters in html.
///
/// ```rust
/// use orgize::export::HtmlEscape as Escape;
///
/// assert_eq!(format!("{}", Escape("< < <")), "&lt; &lt; &lt;");
/// assert_eq!(
/// format!("{}", Escape("<script>alert('Hello XSS')</script>")),
/// "&lt;script&gt;alert(&apos;Hello XSS&apos;)&lt;/script&gt;"
/// );
/// ```
pub struct HtmlEscape<S: AsRef<str>>(pub S);
impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut pos = 0;
let content = self.0.as_ref();
let bytes = content.as_bytes();
while let Some(off) = jetscii::bytes!(b'<', b'>', b'&', b'\'', b'"').find(&bytes[pos..]) {
write!(f, "{}", &content[pos..pos + off])?;
pos += off + 1;
match bytes[pos - 1] {
b'<' => write!(f, "&lt;")?,
b'>' => write!(f, "&gt;")?,
b'&' => write!(f, "&amp;")?,
b'\'' => write!(f, "&apos;")?,
b'"' => write!(f, "&quot;")?,
_ => {}
}
}
write!(f, "{}", &content[pos..])
}
}
#[derive(Default)]
pub struct HtmlExport {
output: String,
in_descriptive_list: Vec<bool>,
table_row: TableRow,
}
#[derive(Default, PartialEq, Eq)]
enum TableRow {
#[default]
HeaderRule,
Header,
BodyRule,
Body,
}
impl HtmlExport {
pub fn push_str(&mut self, s: impl AsRef<str>) {
self.output += s.as_ref();
}
pub fn finish(self) -> String {
self.output
}
}
impl Traverser for HtmlExport {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
match event {
Event::Enter(Container::Document(_)) => self.output += "<main>",
Event::Leave(Container::Document(_)) => self.output += "</main>",
Event::Enter(Container::Headline(headline)) => {
let level = min(headline.level(), 6);
let _ = write!(&mut self.output, "<h{level}>");
for elem in headline.title() {
self.element(elem, ctx);
}
let _ = write!(&mut self.output, "</h{level}>");
}
Event::Leave(Container::Headline(_)) => {}
Event::Enter(Container::Paragraph(_)) => self.output += "<p>",
Event::Leave(Container::Paragraph(_)) => self.output += "</p>",
Event::Enter(Container::Section(_)) => self.output += "<section>",
Event::Leave(Container::Section(_)) => self.output += "</section>",
Event::Enter(Container::Italic(_)) => self.output += "<i>",
Event::Leave(Container::Italic(_)) => self.output += "</i>",
Event::Enter(Container::Bold(_)) => self.output += "<b>",
Event::Leave(Container::Bold(_)) => self.output += "</b>",
Event::Enter(Container::Strike(_)) => self.output += "<s>",
Event::Leave(Container::Strike(_)) => self.output += "</s>",
Event::Enter(Container::Underline(_)) => self.output += "<u>",
Event::Leave(Container::Underline(_)) => self.output += "</u>",
Event::Enter(Container::Verbatim(_)) => self.output += "<code>",
Event::Leave(Container::Verbatim(_)) => self.output += "</code>",
Event::Enter(Container::Code(_)) => self.output += "<code>",
Event::Leave(Container::Code(_)) => self.output += "</code>",
Event::Enter(Container::QuoteBlock(_)) => self.output += "<blockquote>",
Event::Leave(Container::QuoteBlock(_)) => self.output += "</blockquote>",
Event::Enter(Container::VerseBlock(_)) => self.output += "<p class=\"verse\">",
Event::Leave(Container::VerseBlock(_)) => self.output += "</p>",
Event::Enter(Container::ExampleBlock(_)) => self.output += "<pre class=\"example\">",
Event::Leave(Container::ExampleBlock(_)) => self.output += "</pre>",
Event::Enter(Container::CenterBlock(_)) => self.output += "<div class=\"center\">",
Event::Leave(Container::CenterBlock(_)) => self.output += "</div>",
Event::Enter(Container::CommentBlock(_)) => self.output += "<!--",
Event::Leave(Container::CommentBlock(_)) => self.output += "-->",
Event::Enter(Container::Comment(_)) => self.output += "<!--",
Event::Leave(Container::Comment(_)) => self.output += "-->",
Event::Enter(Container::Subscript(_)) => self.output += "<sub>",
Event::Leave(Container::Subscript(_)) => self.output += "</sub>",
Event::Enter(Container::Superscript(_)) => self.output += "<sup>",
Event::Leave(Container::Superscript(_)) => self.output += "</sup>",
Event::Enter(Container::List(list)) => {
self.output += if list.is_ordered() {
self.in_descriptive_list.push(false);
"<ol>"
} else if list.is_descriptive() {
self.in_descriptive_list.push(true);
"<dl>"
} else {
self.in_descriptive_list.push(false);
"<ul>"
};
}
Event::Leave(Container::List(list)) => {
self.output += if list.is_ordered() {
"</ol>"
} else if let Some(true) = self.in_descriptive_list.last() {
"</dl>"
} else {
"</ul>"
};
self.in_descriptive_list.pop();
}
Event::Enter(Container::ListItem(list_item)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "<dt>";
for elem in list_item.tag() {
self.element(elem, ctx);
}
self.output += "</dt><dd>";
} else {
self.output += "<li>";
}
}
Event::Leave(Container::ListItem(_)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "</dd>";
} else {
self.output += "</li>";
}
}
Event::Enter(Container::OrgTable(table)) => {
self.output += "<table>";
self.table_row = if table.has_header() {
TableRow::HeaderRule
} else {
TableRow::BodyRule
}
}
Event::Leave(Container::OrgTable(_)) => {
match self.table_row {
TableRow::Body => self.output += "</tbody>",
TableRow::Header => self.output += "</thead>",
_ => {}
}
self.output += "</table>";
}
Event::Enter(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
match self.table_row {
TableRow::HeaderRule => {
self.table_row = TableRow::Header;
self.output += "<thead>";
}
TableRow::BodyRule => {
self.table_row = TableRow::Body;
self.output += "<tbody>";
}
_ => {}
}
self.output += "<tr>";
}
}
Event::Leave(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
self.output += "</tr>";
}
}
Event::Enter(Container::OrgTableCell(_)) => self.output += "<td>",
Event::Leave(Container::OrgTableCell(_)) => self.output += "</td>",
Event::Enter(Container::Link(link)) => {
let path = link.path();
if link.is_image() {
let _ = write!(&mut self.output, r#"<img src="{}">"#, HtmlEscape(&path));
return ctx.skip();
}
let _ = write!(&mut self.output, r#"<a href="{}">"#, HtmlEscape(&path));
if !link.has_description() {
let _ = write!(&mut self.output, "{}</a>", HtmlEscape(&path));
ctx.skip();
}
}
Event::Leave(Container::Link(_)) => self.output += "</a>",
Event::Text(text) => {
let _ = write!(&mut self.output, "{}", HtmlEscape(text.text()));
}
Event::LineBreak(_) => self.output += "<br/>",
Event::Snippet(snippet) => {
if snippet.backend().eq_ignore_ascii_case("html") {
self.output += &snippet.value();
}
}
Event::Rule(_) => self.output += "<hr/>",
Event::Timestamp(timestamp) => {
self.output += r#"<span class="timestamp-wrapper"><span class="timestamp">"#;
for e in timestamp.syntax.children_with_tokens() {
match e {
NodeOrToken::Token(t) if t.kind() == SyntaxKind::MINUS2 => {
self.output += "&#x2013;";
}
NodeOrToken::Token(t) => {
self.output += t.text();
}
_ => {}
}
}
self.output += r#"</span></span>"#;
}
Event::LatexFragment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::LatexEnvironment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::Entity(entity) => self.output += entity.html(),
_ => {}
}
}
}

View file

@ -1,9 +0,0 @@
//! Export `Org` struct to various formats.
mod event;
mod html;
mod traverse;
pub use event::{Container, Event};
pub use html::{HtmlEscape, HtmlExport};
pub use traverse::{TraversalContext, Traverser};

View file

@ -1,225 +0,0 @@
use crate::ast::*;
use crate::syntax::{SyntaxElement, SyntaxKind};
use rowan::ast::AstNode;
use SyntaxKind::*;
use super::event::{Container, Event};
#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)]
enum TraversalControl {
Up,
Stop,
Skip,
#[default]
Continue,
}
#[derive(Default)]
pub struct TraversalContext {
control: TraversalControl,
}
impl TraversalContext {
/// Stops traversal completely
pub fn stop(&mut self) {
self.control = TraversalControl::Stop;
}
/// Skips traversal of the current node's siblings
pub fn up(&mut self) {
self.control = TraversalControl::Up;
}
/// Skips traversal of the current node's descendants
pub fn skip(&mut self) {
self.control = TraversalControl::Skip;
}
/// Continues traversal
pub fn r#continue(&mut self) {
self.control = TraversalControl::Continue;
}
}
/// A trait for enumerating org syntax tree
///
/// ### `TraversalContext`
///
/// `TraversalContext` can be used to control the traversal.
///
/// For example, `ctx.skip()` will skips the traversal for current
/// element and its descendants and improve the traversal performance.
///
/// ```rust
/// use orgize::{
/// export::{Container, Event, HtmlExport, TraversalContext, Traverser},
/// Org,
/// };
/// use slugify::slugify;
///
/// #[derive(Default)]
/// struct Toc(HtmlExport);
///
/// impl Traverser for Toc {
/// fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
/// match event {
/// Event::Enter(Container::Headline(headline)) => {
/// let title = headline.title().map(|e| e.to_string()).collect::<String>();
/// self.0.push_str(&format!("<a href='#{}'>", slugify!(&title)));
/// for elem in headline.title() {
/// self.element(elem, ctx);
/// }
/// self.0.push_str("</a>");
/// if headline.headlines().count() > 0 {
/// self.0.push_str("<ul>");
/// }
/// }
/// Event::Leave(Container::Headline(headline)) => {
/// if headline.headlines().count() > 0 {
/// self.0.push_str("</ul>");
/// }
/// }
/// Event::Enter(Container::Section(_)) | Event::Leave(Container::Section(_)) => ctx.skip(),
/// Event::Enter(Container::Document(_)) | Event::Leave(Container::Document(_)) => {}
/// _ => self.0.event(event, ctx),
/// }
/// }
/// }
///
/// let org = Org::parse(r#"
/// * heading 1
/// section 1
/// ** heading 1.1
/// ** heading 1.2
/// * heading 2
/// section 2
/// * heading 3
/// **** heading 3.1"#);
/// let mut toc = Toc::default();
/// org.traverse(&mut toc);
/// assert_eq!(toc.0.finish(), "\
/// <a href='#heading-1'>heading 1</a>\
/// <ul><a href='#heading-1-1'>heading 1.1</a><a href='#heading-1-2'>heading 1.2</a></ul>\
/// <a href='#heading-2'>heading 2</a>\
/// <a href='#heading-3'>heading 3</a>\
/// <ul><a href='#heading-3-1'>heading 3.1</a></ul>");
/// ```
pub trait Traverser {
/// Handles traversal event
fn event(&mut self, event: Event, ctx: &mut TraversalContext);
fn element(&mut self, element: SyntaxElement, ctx: &mut TraversalContext) {
macro_rules! take_control {
() => {
match ctx.control {
TraversalControl::Stop => {
ctx.control = TraversalControl::Stop;
return;
}
TraversalControl::Up => {
ctx.control = TraversalControl::Skip;
return;
}
TraversalControl::Skip => {
ctx.control = TraversalControl::Continue;
return;
}
TraversalControl::Continue => {}
}
};
}
match element {
SyntaxElement::Node(node) => {
macro_rules! walk {
($ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::Enter(Container::$ast(node.clone())), ctx);
take_control!();
for child in node.syntax.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
self.event(Event::Leave(Container::$ast(node.clone())), ctx);
take_control!();
}};
(@$ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::$ast(node), ctx);
take_control!();
}};
}
match node.kind() {
DOCUMENT => walk!(Document),
HEADLINE => walk!(Headline),
SECTION => walk!(Section),
PARAGRAPH => walk!(Paragraph),
BOLD => walk!(Bold),
ITALIC => walk!(Italic),
STRIKE => walk!(Strike),
UNDERLINE => walk!(Underline),
LIST => walk!(List),
LIST_ITEM => walk!(ListItem),
CODE => walk!(Code),
INLINE_CALL => walk!(@InlineCall),
INLINE_SRC => walk!(@InlineSrc),
RULE => walk!(@Rule),
VERBATIM => walk!(Verbatim),
SPECIAL_BLOCK => walk!(SpecialBlock),
QUOTE_BLOCK => walk!(QuoteBlock),
CENTER_BLOCK => walk!(CenterBlock),
VERSE_BLOCK => walk!(VerseBlock),
COMMENT_BLOCK => walk!(CommentBlock),
EXAMPLE_BLOCK => walk!(ExampleBlock),
EXPORT_BLOCK => walk!(ExportBlock),
SOURCE_BLOCK => walk!(SourceBlock),
BABEL_CALL => walk!(BabelCall),
CLOCK => walk!(@Clock),
COOKIE => walk!(@Cookie),
RADIO_TARGET => walk!(RadioTarget),
DRAWER => walk!(Drawer),
DYN_BLOCK => walk!(DynBlock),
FN_DEF => walk!(FnDef),
FN_REF => walk!(FnRef),
MACROS => walk!(@Macros),
SNIPPET => walk!(@Snippet),
TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp),
TARGET => walk!(Target),
COMMENT => walk!(Comment),
FIXED_WIDTH => walk!(FixedWidth),
ORG_TABLE => walk!(OrgTable),
ORG_TABLE_RULE_ROW | ORG_TABLE_STANDARD_ROW => walk!(OrgTableRow),
ORG_TABLE_CELL => walk!(OrgTableCell),
LINK => walk!(Link),
LATEX_FRAGMENT => walk!(@LatexFragment),
LATEX_ENVIRONMENT => walk!(@LatexEnvironment),
ENTITY => walk!(@Entity),
LINE_BREAK => walk!(@LineBreak),
SUPERSCRIPT => walk!(Superscript),
SUBSCRIPT => walk!(Subscript),
KEYWORD => walk!(Keyword),
PROPERTY_DRAWER => walk!(PropertyDrawer),
BLOCK_CONTENT | LIST_ITEM_CONTENT => {
for child in node.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
}
NODE_PROPERTY | AFFILIATED_KEYWORD => {}
kind => debug_assert!(
!kind.is_element() && !kind.is_object(),
"{:?} is not handled",
kind
),
}
}
SyntaxElement::Token(token) => {
if token.kind() == TEXT {
self.event(Event::Text(token), ctx);
take_control!();
}
}
};
}
}

View file

@ -1,21 +0,0 @@
#![doc = include_str!("../README.md")]
pub mod ast;
mod config;
mod entities;
pub mod export;
mod org;
mod syntax;
#[cfg(test)]
mod tests;
// Re-export of the rowan crate.
pub use rowan;
pub use config::ParseConfig;
pub use org::Org;
pub use syntax::{
SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
};
pub(crate) use syntax::combinator::lossless_parser;

View file

@ -1,69 +0,0 @@
use rowan::ast::AstNode;
use rowan::GreenNode;
use crate::ast::Document;
use crate::config::ParseConfig;
use crate::export::{HtmlExport, TraversalContext, Traverser};
use crate::syntax::{OrgLanguage, SyntaxNode};
use crate::SyntaxElement;
#[derive(Debug)]
pub struct Org {
pub(crate) green: GreenNode,
pub(crate) config: ParseConfig,
}
impl Org {
/// Parse input string to Org element tree using default parse config
pub fn parse(input: impl AsRef<str>) -> Org {
ParseConfig::default().parse(input)
}
pub fn green(&self) -> &GreenNode {
&self.green
}
pub fn config(&self) -> &ParseConfig {
&self.config
}
/// Returns the document
pub fn document(&self) -> Document {
Document {
syntax: SyntaxNode::new_root(self.green.clone()),
}
}
/// Returns org-mode string
pub fn to_org(&self) -> String {
self.green.to_string()
}
/// Convert org element tree to html-format using default html handler
pub fn to_html(&self) -> String {
let mut handler = HtmlExport::default();
self.traverse(&mut handler);
handler.finish()
}
/// Walk through org element tree using given traverser
pub fn traverse<T: Traverser>(&self, t: &mut T) {
let mut ctx = TraversalContext::default();
t.element(
SyntaxElement::Node(SyntaxNode::new_root(self.green.clone())),
&mut ctx,
);
}
/// Returns the first node in org element tree in depth first order
pub fn first_node<N: AstNode<Language = OrgLanguage>>(&self) -> Option<N> {
fn find<N: AstNode<Language = OrgLanguage>>(node: SyntaxNode) -> Option<N> {
if N::can_cast(node.kind()) {
N::cast(node)
} else {
node.children().find_map(find)
}
}
find(SyntaxNode::new_root(self.green.clone()))
}
}

View file

@ -1,302 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_while, take_while1},
character::complete::{alpha1, space0, space1},
combinator::{cond, opt},
sequence::{separated_pair, tuple},
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement,
NodeBuilder,
},
element::element_nodes,
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind::*,
};
fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
let (input, (block_begin, name)) = block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
let kind = match name {
s if s.eq_ignore_ascii_case("COMMENT") => COMMENT_BLOCK,
s if s.eq_ignore_ascii_case("EXAMPLE") => EXAMPLE_BLOCK,
s if s.eq_ignore_ascii_case("EXPORT") => EXPORT_BLOCK,
s if s.eq_ignore_ascii_case("SRC") => SOURCE_BLOCK,
s if s.eq_ignore_ascii_case("CENTER") => CENTER_BLOCK,
s if s.eq_ignore_ascii_case("QUOTE") => QUOTE_BLOCK,
s if s.eq_ignore_ascii_case("VERSE") => VERSE_BLOCK,
_ => SPECIAL_BLOCK,
};
for (input, contents) in line_starts_iter(&input).map(|i| input.take_split(i)) {
if let Ok((input, block_end)) = block_end_node(input, name) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.extend(affiliated_keywords);
children.push(block_begin);
children.extend(pre_blank);
if kind.is_greater_element() {
children.push(node(BLOCK_CONTENT, element_nodes(contents)?));
} else {
children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents)));
}
children.push(block_end);
children.extend(post_blank);
return Ok((input, node(kind, children)));
}
}
Err(nom::Err::Error(()))
}
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws1);
b.text(begin);
b.text(name);
if name.eq_ignore_ascii_case("SRC") {
let (input, language) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?;
let (input, ws1) = space0(input)?;
let (input, (parameters, ws2, nl)) = trim_line_end(input)?;
if let Some((ws, language)) = language {
b.ws(ws);
b.token(SRC_BLOCK_LANGUAGE, language);
}
if let Some((ws, switches)) = switches {
b.ws(ws);
b.token(SRC_BLOCK_SWITCHES, switches);
}
b.ws(ws1);
if !parameters.is_empty() {
b.token(SRC_BLOCK_PARAMETERS, parameters);
}
b.ws(ws2);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else if name.eq_ignore_ascii_case("EXPORT") {
let (input, ty) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
if let Some((ws, ty)) = ty {
b.ws(ws);
b.token(EXPORT_BLOCK_TYPE, ty);
}
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else {
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
}
}
fn source_block_switches(input: Input) -> IResult<Input, Input, ()> {
let mut i = input;
while !i.is_empty() {
match tuple::<_, _, (), _>((
cond(i.len() != input.len(), space1),
alt((
separated_pair(
alt((tag("-l"), tag("-n"))),
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
),
tuple((tag("+"), alpha1)),
tuple((tag("-"), alpha1)),
)),
))(i)
{
Ok((i_, _)) => i = i_,
_ => break,
}
}
let len = input.len() - i.len();
if len == 0 {
Err(nom::Err::Error(()))
} else {
Ok(input.take_split(len))
}
}
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
let (input, (ws, end, name, ws_, nl)) =
tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.text(name);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(BLOCK_END)))
}
fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
let mut nodes = vec![];
let s = input.as_str();
let mut start = 0;
for i in line_starts_iter(s) {
// line must start with either ",*" or ",#+"
if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") {
continue;
}
let text = &s[start..i];
if !text.is_empty() {
nodes.push(token(TEXT, text));
}
nodes.push(token(COMMA, ","));
start = i + 1;
}
if !s[start..].is_empty() {
nodes.push(token(TEXT, &s[start..]));
}
nodes
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(block_node_base, input)
}
#[test]
fn test_parse() {
use crate::ast::{ExampleBlock, SourceBlock};
use crate::tests::to_ast;
let to_src_block = to_ast::<SourceBlock>(block_node);
let to_example_block = to_ast::<ExampleBlock>(block_node);
insta::assert_debug_snapshot!(
to_example_block(
r#"#+BEGIN_EXAMPLE
,* headline
,#+block
text
#+END_EXAMPLE"#
).syntax,
@r###"
EXAMPLE_BLOCK@0..59
BLOCK_BEGIN@0..16
TEXT@0..8 "#+BEGIN_"
TEXT@8..15 "EXAMPLE"
NEW_LINE@15..16 "\n"
BLOCK_CONTENT@16..42
COMMA@16..17 ","
TEXT@17..28 "* headline\n"
COMMA@28..29 ","
TEXT@29..42 "#+block\ntext\n"
BLOCK_END@42..59
WHITESPACE@42..46 " "
TEXT@46..52 "#+END_"
TEXT@52..59 "EXAMPLE"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC
#+END_SRC"#
).syntax,
@r###"
SOURCE_BLOCK@0..27
BLOCK_BEGIN@0..12
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13 "\n"
BLANK_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
TEXT@18..24 "#+END_"
TEXT@24..27 "SRC"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+begin_src
#+end_src"#
).syntax,
@r###"
SOURCE_BLOCK@0..25
BLOCK_BEGIN@0..12
TEXT@0..8 "#+begin_"
TEXT@8..11 "src"
NEW_LINE@11..12 "\n"
BLOCK_CONTENT@12..12
BLOCK_END@12..25
WHITESPACE@12..16 " "
TEXT@16..22 "#+end_"
TEXT@22..25 "src"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC javascript -n 20 -r :var n=0, l=2 :foo=bar
alert('Hello World!');
#+END_SRC
"#).syntax,
@r###"
SOURCE_BLOCK@0..100
BLOCK_BEGIN@0..58
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
WHITESPACE@11..12 " "
SRC_BLOCK_LANGUAGE@12..22 "javascript"
WHITESPACE@22..24 " "
SRC_BLOCK_SWITCHES@24..32 "-n 20 -r"
WHITESPACE@32..34 " "
SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar"
NEW_LINE@57..58 "\n"
BLOCK_CONTENT@58..81
TEXT@58..81 "alert('Hello World!');\n"
BLOCK_END@81..95
WHITESPACE@81..85 " "
TEXT@85..91 "#+END_"
TEXT@91..94 "SRC"
NEW_LINE@94..95 "\n"
BLANK_LINE@95..96 "\n"
BLANK_LINE@96..100 " "
"###
);
// TODO: more testing
}

View file

@ -1,131 +0,0 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, space0},
combinator::{map, opt, recognize},
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder,
},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn clock_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
space0,
tag("CLOCK:"),
space0,
alt((timestamp_inactive_node, timestamp_active_node)),
opt(tuple((
space0,
double_arrow_token,
space0,
recognize(tuple((digit1, colon_token, digit1))),
))),
space0,
eol_or_eof,
blank_lines,
)),
|(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(clock);
b.ws(ws_);
b.push(timestamp);
if let Some((ws, double_arrow, ws_, time)) = duration {
b.ws(ws);
b.push(double_arrow);
b.ws(ws_);
b.text(time);
}
b.ws(ws__);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::CLOCK)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Clock;
use crate::tests::to_ast;
let to_clock = to_ast::<Clock>(clock_node);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax,
@r###"
CLOCK@0..29
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..29
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
"###
);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax,
@r###"
CLOCK@0..64
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..53
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
MINUS2@29..31 "--"
L_BRACKET@31..32 "["
TIMESTAMP_YEAR@32..36 "2003"
MINUS@36..37 "-"
TIMESTAMP_MONTH@37..39 "09"
MINUS@39..40 "-"
TIMESTAMP_DAY@40..42 "16"
WHITESPACE@42..43 " "
TIMESTAMP_DAYNAME@43..46 "Tue"
WHITESPACE@46..47 " "
TIMESTAMP_HOUR@47..49 "10"
COLON@49..50 ":"
TIMESTAMP_MINUTE@50..52 "39"
R_BRACKET@52..53 "]"
WHITESPACE@53..54 " "
DOUBLE_ARROW@54..56 "=>"
WHITESPACE@56..58 " "
TEXT@58..62 "1:00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64 "\n"
"###
);
}

View file

@ -1,321 +0,0 @@
use memchr::{memchr2, memchr2_iter, Memchr2};
use nom::{bytes::complete::tag, IResult, InputTake, Slice};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
use std::iter::once;
use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*};
pub type GreenElement = NodeOrToken<GreenNode, GreenToken>;
#[inline]
pub fn token(kind: SyntaxKind, input: &str) -> GreenElement {
GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input))
}
#[inline]
pub fn node<I>(kind: SyntaxKind, children: I) -> GreenElement
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children))
}
macro_rules! token_parser {
($name:ident, $token:literal, $kind:ident) => {
#[doc = "Recognizes `"]
#[doc = $token]
#[doc = "` and returns GreenToken"]
pub fn $name(input: Input) -> IResult<Input, GreenElement, ()> {
let (i, o) = tag($token)(input)?;
Ok((i, token($kind, o.as_str())))
}
};
}
token_parser!(l_bracket_token, "[", L_BRACKET);
token_parser!(r_bracket_token, "]", R_BRACKET);
token_parser!(l_bracket2_token, "[[", L_BRACKET2);
token_parser!(r_bracket2_token, "]]", R_BRACKET2);
token_parser!(l_parens_token, "(", L_PARENS);
token_parser!(r_parens_token, ")", R_PARENS);
token_parser!(l_angle_token, "<", L_ANGLE);
token_parser!(r_angle_token, ">", R_ANGLE);
token_parser!(l_curly_token, "{", L_CURLY);
token_parser!(r_curly_token, "}", R_CURLY);
token_parser!(l_curly3_token, "{{{", L_CURLY3);
token_parser!(r_curly3_token, "}}}", R_CURLY3);
token_parser!(l_angle2_token, "<<", L_ANGLE2);
token_parser!(r_angle2_token, ">>", R_ANGLE2);
token_parser!(l_angle3_token, "<<<", L_ANGLE3);
token_parser!(r_angle3_token, ">>>", R_ANGLE3);
token_parser!(at_token, "@", AT);
token_parser!(at2_token, "@@", AT2);
token_parser!(minus2_token, "--", MINUS2);
// token_parser!(percent_token, "%", PERCENT);
token_parser!(percent2_token, "%%", PERCENT2);
// token_parser!(slash_token, "/", SLASH);
token_parser!(backslash_token, "\\", BACKSLASH);
token_parser!(underscore_token, "_", UNDERSCORE);
// token_parser!(star_token, "*", STAR);
token_parser!(plus_token, "+", PLUS);
token_parser!(minus_token, "-", MINUS);
token_parser!(colon_token, ":", COLON);
token_parser!(colon2_token, "::", COLON2);
token_parser!(pipe_token, "|", PIPE);
token_parser!(dollar_token, "$", DOLLAR);
token_parser!(dollar2_token, "$$", DOLLAR2);
// token_parser!(equal_token, "=", EQUAL);
// token_parser!(tilde_token, "~", TILDE);
token_parser!(hash_plus_token, "#+", HASH_PLUS);
token_parser!(caret_token, "^", CARET);
token_parser!(hash_token, "#", HASH);
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
macro_rules! lossless_parser {
($parser:expr, $input:expr) => {{
let i_ = $input;
let (i, o) = $parser($input)?;
tracing::trace!(consumed = o.to_string());
debug_assert_eq!(
&i_.as_str()[0..(i_.len() - i.len())],
&o.to_string(),
stringify!("parser must be lossless")
);
Ok((i, o))
}};
}
pub(crate) use lossless_parser;
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
if input.is_empty() {
return Ok((input, vec![]));
}
let mut lines = vec![];
let mut start = 0;
let bytes = input.as_bytes();
for index in line_ends_iter(input.as_str()) {
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
start = index;
} else {
break;
}
}
Ok((input.slice(start..), lines))
}
#[test]
fn test_blank_lines() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = blank_lines(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines(("\n", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 1);
assert_eq!(output[0].to_string(), "\n");
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 5);
assert_eq!(output[0].to_string(), " \r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n ");
assert_eq!(output.len(), 6);
assert_eq!(output[0].to_string(), "\r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), "\r");
assert_eq!(output[5].to_string(), " \r");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding
pub fn trim_line_end(input: Input) -> IResult<Input, (Input, Input, Input), ()> {
let bytes = input.as_bytes();
let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) {
Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => (
input.slice(i + 2..),
input.slice(0..i),
input.slice(i..i + 2),
),
Some(i) => (
input.slice(i + 1..),
input.slice(0..i),
input.slice(i..i + 1),
),
_ => (input.of(""), input, input.of("")),
};
let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) {
Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)),
None => (contents.of(""), contents),
};
Ok((input, (contents, ws, nl)))
}
#[test]
fn test_trim_line_end() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = trim_line_end(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) =
trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap();
assert_eq!(input.as_str(), "rest\n");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r\n");
let (input, output) = trim_line_end((" \rr", config).into()).unwrap();
assert_eq!(input.as_str(), "r");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r");
}
/// Recognizes a line ending \r, \n, \r\n or end of file
pub fn eol_or_eof(input: Input) -> IResult<Input, Input, ()> {
let mut bytes = input.bytes();
let count = match bytes.next() {
Some(b'\n') => 1,
Some(b'\r') => {
if matches!(bytes.next(), Some(b'\n')) {
2
} else {
1
}
}
None => 0,
_ => return Err(nom::Err::Error(())),
};
Ok(input.take_split(count))
}
struct LineStart<'a> {
bytes: &'a [u8],
iter: Memchr2<'a>,
}
impl<'a> LineStart<'a> {
fn new(input: &'a str) -> Self {
let bytes = input.as_bytes();
LineStart {
bytes,
iter: memchr2_iter(b'\r', b'\n', bytes),
}
}
}
impl<'a> Iterator for LineStart<'a> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
let i = self.iter.next()?;
if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') {
let ii = self.iter.next();
debug_assert_eq!(i + 1, ii.unwrap());
Some(i + 2)
} else {
Some(i + 1)
}
}
}
/// Returns an iterator of positions of line start, including zero
pub fn line_starts_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
once(0).chain(LineStart::new(s))
}
/// Returns an iterator of positions of line end, including eof
pub fn line_ends_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
LineStart::new(s).chain(once(s.len()))
}
pub struct NodeBuilder {
pub children: Vec<GreenElement>,
}
impl NodeBuilder {
pub fn new() -> NodeBuilder {
NodeBuilder { children: vec![] }
}
pub fn ws(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace()));
self.children.push(i.ws_token())
}
}
pub fn nl(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(
i.s == "\n" || i.s == "\r\n" || i.s == "\r",
"{:?} should be a new line",
i.s
);
self.children.push(i.nl_token())
}
}
pub fn text(&mut self, i: Input) {
if !i.is_empty() {
self.children.push(i.text_token())
}
}
pub fn token(&mut self, kind: SyntaxKind, i: Input) {
self.children.push(i.token(kind))
}
pub fn push(&mut self, elem: GreenElement) {
self.children.push(elem)
}
pub fn push_opt(&mut self, elem: Option<GreenElement>) {
if let Some(elem) = elem {
self.children.push(elem)
}
}
pub fn len(&self) -> usize {
self.children.len()
}
pub fn finish(self, kind: SyntaxKind) -> GreenElement {
GreenElement::Node(GreenNode::new(kind.into(), self.children))
}
}

View file

@ -1,112 +0,0 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
fn comment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let mut iter = iterator(
input,
opt(tuple((
space0,
tag("#"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::HASH, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::COMMENT)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn comment_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(comment_node_base, input)
}
#[test]
fn parse() {
use crate::{
syntax::{comment::comment_node, input::Input, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
SyntaxNode::new_root(
comment_node(Input {
s: input,
c: &ParseConfig::default(),
})
.unwrap()
.1
.into_node()
.unwrap(),
)
};
insta::assert_debug_snapshot!(
t("#"),
@r###"
COMMENT@0..1
HASH@0..1 "#"
"###
);
insta::assert_debug_snapshot!(
t("#\n # a\n #\n\n"),
@r###"
COMMENT@0..12
HASH@0..1 "#"
TEXT@1..2 "\n"
WHITESPACE@2..4 " "
HASH@4..5 "#"
WHITESPACE@5..6 " "
TEXT@6..7 "a"
TEXT@7..8 "\n"
WHITESPACE@8..9 " "
HASH@9..10 "#"
TEXT@10..11 "\n"
BLANK_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
t("#\na\n #\n\n"),
@r###"
COMMENT@0..2
HASH@0..1 "#"
TEXT@1..2 "\n"
"###
);
}

View file

@ -1,144 +0,0 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::map,
sequence::{pair, separated_pair, tuple},
IResult,
};
use super::{
combinator::{l_bracket_token, node, r_bracket_token, token, GreenElement},
input::Input,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn cookie_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_bracket_token,
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
r_bracket_token,
)),
|(l_bracket, value, r_bracket)| {
let mut children = vec![l_bracket];
children.push(token(TEXT, value.0.as_str()));
match value.1.as_str() {
"%" => {
children.push(token(PERCENT, value.1.as_str()));
}
_ => {
children.push(token(SLASH, "/"));
children.push(token(TEXT, value.1.as_str()));
}
}
children.push(r_bracket);
node(COOKIE, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Cookie;
use crate::tests::to_ast;
use crate::ParseConfig;
let to_cookie = to_ast::<Cookie>(cookie_node);
insta::assert_debug_snapshot!(
to_cookie("[1/10]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..5 "10"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[1/1000]").syntax,
@r###"
COOKIE@0..8
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..7 "1000"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[10%]").syntax,
@r###"
COOKIE@0..5
L_BRACKET@0..1 "["
TEXT@1..3 "10"
PERCENT@3..4 "%"
R_BRACKET@4..5 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[%]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
PERCENT@1..2 "%"
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..2 ""
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[100/]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..4 "100"
SLASH@4..5 "/"
TEXT@5..5 ""
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/100]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..5 "100"
R_BRACKET@5..6 "]"
"###
);
let config = &ParseConfig::default();
assert!(cookie_node(("[10% ]", config).into()).is_err());
assert!(cookie_node(("[1//100]", config).into()).is_err());
assert!(cookie_node(("[1\\100]", config).into()).is_err());
assert!(cookie_node(("[10%%]", config).into()).is_err());
}

View file

@ -1,126 +0,0 @@
use nom::{combinator::opt, IResult};
use super::{
combinator::{blank_lines, node, GreenElement},
headline::{headline_node, section_node},
input::Input,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn document_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(document_node_base, input)
}
fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![];
let (input, pre_blank) = blank_lines(input)?;
children.extend(pre_blank);
if input.is_empty() {
return Ok((input, node(DOCUMENT, children)));
}
let (input, section) = opt(section_node)(input)?;
if let Some(section) = section {
children.push(section);
}
let mut i = input;
while !i.is_empty() {
let (input, headline) = headline_node(i)?;
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len(),);
i = input;
children.push(headline);
}
Ok((i, node(DOCUMENT, children)))
}
#[test]
fn parse() {
use crate::ast::Document;
use crate::tests::to_ast;
let to_document = to_ast::<Document>(document_node);
insta::assert_debug_snapshot!(
to_document("").syntax,
@r###"
DOCUMENT@0..0
"###
);
insta::assert_debug_snapshot!(
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1 "\n"
BLANK_LINE@1..4 " \n"
BLANK_LINE@4..5 "\n"
"###
);
insta::assert_debug_snapshot!(
to_document("section").syntax,
@r###"
DOCUMENT@0..7
SECTION@0..7
PARAGRAPH@0..7
TEXT@0..7 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
HEADLINE_TITLE@3..10
TEXT@3..10 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "
HEADLINE_TITLE@4..13
TEXT@4..13 "heading 2"
NEW_LINE@13..14 "\n"
HEADLINE@14..25
HEADLINE_STARS@14..15 "*"
WHITESPACE@15..16 " "
HEADLINE_TITLE@16..25
TEXT@16..25 "heading 1"
"###
);
insta::assert_debug_snapshot!(
to_document("section\n** heading 2\n*heading 1").syntax,
@r###"
DOCUMENT@0..31
SECTION@0..8
PARAGRAPH@0..8
TEXT@0..8 "section\n"
HEADLINE@8..31
HEADLINE_STARS@8..10 "**"
WHITESPACE@10..11 " "
HEADLINE_TITLE@11..20
TEXT@11..20 "heading 2"
NEW_LINE@20..21 "\n"
SECTION@21..31
PARAGRAPH@21..31
TEXT@21..31 "*heading 1"
"###
);
}

View file

@ -1,198 +0,0 @@
use nom::{
bytes::complete::{tag_no_case, take_while1},
character::complete::{space0, space1},
combinator::{iterator, map, opt},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, colon_token, eol_or_eof, line_starts_iter, node, plus_token, trim_line_end,
GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn drawer_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let mut b = NodeBuilder::new();
let (input, (ws, colon, name, colon_, ws_, nl)) = tuple((
space0,
colon_token,
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
colon_token,
space0,
eol_or_eof,
))(input)?;
b.ws(ws);
b.push(colon);
b.text(name);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(DRAWER_BEGIN), name.as_str())))
}
fn drawer_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, colon, end, colon_, ws_, nl)) = tuple((
space0,
colon_token,
tag_no_case("END"),
colon_token,
space0,
eol_or_eof,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(end);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DRAWER_END)))
}
fn drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, _)) = drawer_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = drawer_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DRAWER, children)));
}
}
Err(nom::Err::Error(()))
}
fn property_drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, name)) = drawer_begin_node(input)?;
if name != "PROPERTIES" {
return Err(nom::Err::Error(()));
}
let mut children = vec![begin];
let mut it = iterator(input, node_property_node);
children.extend(&mut it);
let (input, _) = it.finish()?;
let (input, end) = drawer_end_node(input)?;
children.push(end);
Ok((input, node(PROPERTY_DRAWER, children)))
}
fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
map(
tuple((
space0,
colon_token,
take_while1(|c| c != ':' && c != '+'),
opt(plus_token),
colon_token,
space1,
trim_line_end,
)),
|(ws, colon, name, plus, colon_, ws_, (value, ws__, nl))| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(name);
b.push_opt(plus);
b.push(colon_);
b.ws(ws_);
b.text(value);
b.ws(ws__);
b.nl(nl);
b.finish(NODE_PROPERTY)
},
)(input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
crate::lossless_parser!(property_drawer_node_base, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(drawer_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::Drawer, tests::to_ast, ParseConfig};
let to_drawer = to_ast::<Drawer>(drawer_node);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:CUSTOM_ID: id
:END:"#
).syntax,
@r###"
DRAWER@0..33
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
TEXT@9..26 " :CUSTOM_ID: id\n"
DRAWER_END@26..33
WHITESPACE@26..28 " "
COLON@28..29 ":"
TEXT@29..32 "END"
COLON@32..33 ":"
"###
);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:END:
"#
).syntax,
@r###"
DRAWER@0..19
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10 "\n"
TEXT@10..10 ""
DRAWER_END@10..18
WHITESPACE@10..12 " "
COLON@12..13 ":"
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19 "\n"
"###
);
let config = &ParseConfig::default();
// https://github.com/PoiScript/orgize/issues/9
assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err());
}

View file

@ -1,104 +0,0 @@
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0, space1},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn dyn_block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, begin) = dyn_block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = dyn_block_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DYN_BLOCK, children)));
}
}
Err(nom::Err::Error(()))
}
fn dyn_block_begin_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple((
space0,
tag_no_case("#+BEGIN:"),
space1,
alpha1,
trim_line_end,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(begin);
b.ws(ws_);
b.text(name);
b.text(args);
b.ws(ws__);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_BEGIN)))
}
fn dyn_block_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, end, ws_, nl)) =
tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_END)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn dyn_block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(dyn_block_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::DynBlock, tests::to_ast};
let to_dyn_block = to_ast::<DynBlock>(dyn_block_node);
insta::assert_debug_snapshot!(
to_dyn_block(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#).syntax,
@r###"
DYN_BLOCK@0..53
DYN_BLOCK_BEGIN@0..32
TEXT@0..8 "#+BEGIN:"
WHITESPACE@8..9 " "
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53 " "
"###
);
}

View file

@ -1,333 +0,0 @@
use std::iter::once;
use memchr::memchr2_iter;
use nom::{IResult, InputTake};
use super::{
block::block_node,
clock::clock_node,
combinator::GreenElement,
comment::comment_node,
drawer::drawer_node,
dyn_block::dyn_block_node,
fixed_width::fixed_width_node,
fn_def::fn_def_node,
input::Input,
keyword::{affiliated_keyword_nodes, keyword_node},
latex_environment::latex_environment_node,
list::list_node,
paragraph::{paragraph_node, paragraph_nodes},
rule::rule_node,
table::{org_table_node, table_el_node},
};
/// Recognizes multiple org-mode elements
///
/// input must not contains blank line in the beginning
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
debug_assert!(!input.is_empty());
// TODO:
// debug_assert!(
// blank_lines(input).unwrap().1.is_empty(),
// "input must not starts with blank lines: {:?}",
// input.s
// );
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in ElementPositions::new(i) {
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
nodes.extend(paragraph_nodes(head)?);
}
nodes.push(element);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
continue 'l;
}
}
nodes.extend(paragraph_nodes(i)?);
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, n| s + &n.to_string()),
"parser must be lossless"
);
Ok(nodes)
}
/// Recognizes an org-mode element expect paragraph
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
// skip affiliated keyword first
let (i, nodes) = affiliated_keyword_nodes(input)?;
let has_affiliated_keyword = !nodes.is_empty();
// find first non-whitespace character
let byte = i.bytes().find(|&b| b != b' ' && b != b'\t');
debug_assert!(
!(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))),
"affiliated_keyword must not followed by blank lines: {:?}",
input.s
);
let result = match byte {
Some(b'[') => fn_def_node(input),
Some(b'0'..=b'9') | Some(b'*') => list_node(input),
// clock doesn't have affiliated keywords
Some(b'C') if !has_affiliated_keyword => clock_node(input),
Some(b'-') => rule_node(input).or_else(|_| list_node(input)),
Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)),
Some(b'|') => org_table_node(input),
Some(b'+') => table_el_node(input).or_else(|_| list_node(input)),
Some(b'#') => block_node(input)
.or_else(|_| keyword_node(input))
.or_else(|_| dyn_block_node(input))
.or_else(|_| comment_node(input)),
Some(b'\\') => latex_environment_node(input),
_ => Err(nom::Err::Error(())),
};
if has_affiliated_keyword {
result.or_else(|_| paragraph_node(input))
} else {
result
}
}
struct ElementPositions<'a> {
input: Input<'a>,
pos: usize,
}
impl<'a> ElementPositions<'a> {
fn new(input: Input<'a>) -> Self {
ElementPositions { input, pos: 0 }
}
}
impl<'a> Iterator for ElementPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.input.s.len() {
return None;
}
let bytes = &self.input.as_bytes()[self.pos..];
let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1));
while let Some(i) = iter.next() {
let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?;
if matches!(
b,
b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\'
) {
let previous = self.pos;
self.pos = iter
.next()
.map_or_else(|| self.input.s.len(), |i| i + self.pos);
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
"{} < {} < {}",
previous,
self.pos,
self.input.s.len()
);
let (input, head) = self.input.take_split(i + previous);
return Some((input, head));
}
}
None
}
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n";
let vec = ElementPositions::new((s, &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 5);
assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n");
assert_eq!(vec[3].0.s, "\t\t[\n: \r\n");
assert_eq!(vec[4].0.s, ": \r\n");
}
#[test]
fn parse() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// paragraph stops at blank lines
insta::assert_debug_snapshot!(
t(r#"a
b"#),
@r###"
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###
);
// paragraph followed by special element
insta::assert_debug_snapshot!(
t("Table:\n|cell"),
@r###"
SECTION@0..12
PARAGRAPH@0..7
TEXT@0..7 "Table:\n"
ORG_TABLE@7..12
ORG_TABLE_STANDARD_ROW@7..12
PIPE@7..8 "|"
ORG_TABLE_CELL@8..12
TEXT@8..12 "cell"
"###
);
}
#[test]
fn affiliated_keywords() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// affiliated keywords + paragraph
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
@r###"
SECTION@0..41
PARAGRAPH@0..41
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
LINK@26..41
L_BRACKET2@26..28 "[["
LINK_PATH@28..39 "./img/a.jpg"
R_BRACKET2@39..41 "]]"
"###
);
// affiliated keywords + blank lines, fallback to normal keyword
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n#+CAPTION: abc\n\n[[./img/a.jpg]]"),
@r###"
SECTION@0..57
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
KEYWORD@26..42
HASH_PLUS@26..28 "#+"
TEXT@28..35 "CAPTION"
COLON@35..36 ":"
TEXT@36..40 " abc"
NEW_LINE@40..41 "\n"
BLANK_LINE@41..42 "\n"
PARAGRAPH@42..57
LINK@42..57
L_BRACKET2@42..44 "[["
LINK_PATH@44..55 "./img/a.jpg"
R_BRACKET2@55..57 "]]"
"###
);
// affiliated keywords + special element
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\n[fn:WORD] https://orgmode.org"),
@r###"
SECTION@0..55
FN_DEF@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "WORD"
R_BRACKET@34..35 "]"
TEXT@35..55 " https://orgmode.org"
"###
);
// affiliated keywords + clock
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\nCLOCK: [2003-09-16 Tue 09:39]"),
@r###"
SECTION@0..55
PARAGRAPH@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
TEXT@26..33 "CLOCK: "
TIMESTAMP_INACTIVE@33..55
L_BRACKET@33..34 "["
TIMESTAMP_YEAR@34..38 "2003"
MINUS@38..39 "-"
TIMESTAMP_MONTH@39..41 "09"
MINUS@41..42 "-"
TIMESTAMP_DAY@42..44 "16"
WHITESPACE@44..45 " "
TIMESTAMP_DAYNAME@45..48 "Tue"
WHITESPACE@48..49 " "
TIMESTAMP_HOUR@49..51 "09"
COLON@51..52 ":"
TIMESTAMP_MINUTE@52..54 "39"
R_BRACKET@54..55 "]"
"###
);
// affiliated keywords + eof
insta::assert_debug_snapshot!(
t("#+CAPTION: Longer caption."),
@r###"
SECTION@0..26
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..26 " Longer caption."
"###
);
}

View file

@ -1,168 +0,0 @@
use bytecount::count;
use memchr::memchr_iter;
use nom::{combinator::map, IResult, Slice};
use super::{
combinator::{node, token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn bold_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
children.extend(standard_object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
});
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn code_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'~'), |contents| {
node(
CODE,
[token(TILDE, "~"), contents.text_token(), token(TILDE, "~")],
)
});
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn strike_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
children.extend(standard_object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
});
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn verbatim_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'='), |contents| {
node(
VERBATIM,
[token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")],
)
});
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
children.extend(standard_object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
});
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn italic_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
children.extend(standard_object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
});
crate::lossless_parser!(parser, input)
}
fn emphasis(marker: u8) -> impl Fn(Input) -> IResult<Input, Input, ()> {
move |input: Input| {
let bytes = input.as_bytes();
if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() {
return Err(nom::Err::Error(()));
}
for idx in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if idx == 1 {
continue;
} else if count(&bytes[1..idx], b'\n') >= 2 {
break;
} else if validate_marker(idx, input) {
return Ok((input.slice(idx + 1..), input.slice(1..idx)));
}
}
Err(nom::Err::Error(()))
}
}
fn validate_marker(pos: usize, text: Input) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(post) = text.as_bytes().get(pos + 1) {
[
b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}',
b'[',
]
.contains(post)
} else {
true
}
}
pub fn verify_pre(input: &str) -> bool {
if input.is_empty() {
return true;
}
matches!(
input.as_bytes()[input.len() - 1],
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
)
}
#[test]
fn parse() {
use crate::{ast::Bold, tests::to_ast, ParseConfig};
let to_bold = to_ast::<Bold>(bold_node);
insta::assert_debug_snapshot!(
to_bold("*bold*").syntax,
@r###"
BOLD@0..6
STAR@0..1 "*"
TEXT@1..5 "bold"
STAR@5..6 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo*ld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo*ld"
STAR@6..7 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo\nld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo\nld"
STAR@6..7 "*"
"###
);
let config = &ParseConfig::default();
assert!(bold_node(("*bold*a", config).into()).is_err());
assert!(bold_node(("*bold *", config).into()).is_err());
assert!(bold_node(("* bold*", config).into()).is_err());
assert!(bold_node(("*b\nol\nd*", config).into()).is_err());
assert!(italic_node(("*bold*", config).into()).is_err());
}

View file

@ -1,120 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::alphanumeric1,
combinator::opt,
IResult,
};
use crate::{
entities::ENTITIES,
syntax::combinator::{backslash_token, node},
SyntaxKind,
};
use super::{combinator::GreenElement, input::Input};
pub fn entity_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with('\\'));
let mut parser = alt((template1, template2));
crate::lossless_parser!(parser, input)
}
// \NAME POST or // \NAME{}
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, name) = alphanumeric1(input)?;
if ENTITIES.iter().all(|i| i.0 != name.s) {
return Err(nom::Err::Error(()));
}
let (input, brackets) = opt(tag("{}"))(input)?;
if let Some(brackets) = brackets {
return Ok((
input,
node(
SyntaxKind::ENTITY,
[backslash, name.text_token(), brackets.text_token()],
),
));
}
if let Some(post) = input.bytes().next() {
if post.is_ascii_alphabetic() {
return Err(nom::Err::Error(()));
}
}
Ok((
input,
node(SyntaxKind::ENTITY, [backslash, name.text_token()]),
))
}
// \_SPACES
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, underscore) = tag("_")(input)?;
let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?;
Ok((
input,
node(
SyntaxKind::ENTITY,
[
backslash,
underscore.token(SyntaxKind::UNDERSCORE),
spaces.text_token(),
],
),
))
}
#[test]
fn parse() {
use crate::{ast::Entity, tests::to_ast, ParseConfig};
let to_entity = to_ast::<Entity>(entity_node);
insta::assert_debug_snapshot!(
to_entity("\\cent").syntax,
@r###"
ENTITY@0..5
BACKSLASH@0..1 "\\"
TEXT@1..5 "cent"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\S").syntax,
@r###"
ENTITY@0..2
BACKSLASH@0..1 "\\"
TEXT@1..2 "S"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\frac12{}test").syntax,
@r###"
ENTITY@0..9
BACKSLASH@0..1 "\\"
TEXT@1..7 "frac12"
TEXT@7..9 "{}"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\_ ").syntax,
@r###"
ENTITY@0..21
BACKSLASH@0..1 "\\"
UNDERSCORE@1..2 "_"
TEXT@2..21 " "
"###
);
let c = ParseConfig::default();
assert!(entity_node(("\\poi", &c).into()).is_err());
}

View file

@ -1,97 +0,0 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
fn fixed_width_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let (input, keywords) = affiliated_keyword_nodes(input)?;
b.children.extend(keywords);
let mut iter = iterator(
input,
opt(tuple((
space0,
tag(":"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::COMMA, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::FIXED_WIDTH)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn fixed_width_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fixed_width_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::FixedWidth, tests::to_ast};
let to_fixed_width = to_ast::<FixedWidth>(fixed_width_node);
insta::assert_debug_snapshot!(
to_fixed_width(
r#": A
:
: B
: C
"#
).syntax,
@r###"
FIXED_WIDTH@0..19
COMMA@0..1 ":"
WHITESPACE@1..2 " "
TEXT@2..3 "A"
TEXT@3..4 "\n"
COMMA@4..5 ":"
TEXT@5..6 "\n"
COMMA@6..7 ":"
WHITESPACE@7..8 " "
TEXT@8..9 "B"
TEXT@9..10 "\n"
COMMA@10..11 ":"
WHITESPACE@11..12 " "
TEXT@12..13 "C"
TEXT@13..14 "\n"
BLANK_LINE@14..15 "\n"
BLANK_LINE@15..19 " "
"###
);
}

View file

@ -1,154 +0,0 @@
use nom::{
bytes::complete::{tag, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement,
NodeBuilder,
},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn fn_def_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
affiliated_keyword_nodes,
l_bracket_token,
tag("fn"),
colon_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
r_bracket_token,
trim_line_end,
blank_lines,
)),
|(
affiliated_keywords,
l_bracket,
fn_,
colon,
label,
r_bracket,
(content, ws_, nl),
post_blank,
)| {
let mut b = NodeBuilder::new();
b.children.extend(affiliated_keywords);
b.push(l_bracket);
b.text(fn_);
b.push(colon);
b.text(label);
b.push(r_bracket);
b.text(content);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::FN_DEF)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ParseConfig;
use crate::{ast::FnDef, tests::to_ast};
let to_fn_def = to_ast::<FnDef>(fn_def_node);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..26
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..26 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:word_1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "word_1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "WORD-1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD]").syntax,
@r###"
FN_DEF@0..9
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..8 "WORD"
R_BRACKET@8..9 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax,
@r###"
FN_DEF@0..66
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..65 " In particular, the p ..."
NEW_LINE@65..66 "\n"
"###
);
let config = &ParseConfig::default();
assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err());
insta::assert_debug_snapshot!(
to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..45
AFFILIATED_KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..10 "ATTR_poi"
COLON@10..11 ":"
TEXT@11..13 " 1"
NEW_LINE@13..14 "\n"
L_BRACKET@14..15 "["
TEXT@15..17 "fn"
COLON@17..18 ":"
TEXT@18..24 "WORD-1"
R_BRACKET@24..25 "]"
TEXT@25..45 " https://orgmode.org"
"###
);
}

View file

@ -1,119 +0,0 @@
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::tuple,
Err, IResult, InputTake,
};
use super::{
combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn fn_ref_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fn_ref_node_base, input)
}
fn fn_ref_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple((
l_bracket_token,
tag("fn"),
colon_token,
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
opt(tuple((colon_token, balanced_brackets))),
r_bracket_token,
))(input)?;
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
children.extend(standard_object_nodes(definition));
}
children.push(r_bracket);
Ok((input, node(FN_REF, children)))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'[', b']', bytes) {
if bytes[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(Err::Error(()))
}
#[test]
fn parse() {
use crate::{ast::FnRef, tests::to_ast, ParseConfig};
let to_fn_ref = to_ast::<FnRef>(fn_ref_node);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1]").syntax,
@r###"
FN_REF@0..6
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1:2]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
COLON@5..6 ":"
TEXT@6..7 "2"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::2]").syntax,
@r###"
FN_REF@0..7
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..6 "2"
R_BRACKET@6..7 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::[]]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..7 "[]"
R_BRACKET@7..8 "]"
"###
);
let config = &ParseConfig::default();
assert!(fn_ref_node(("[fn::[]", config).into()).is_err());
}

View file

@ -1,357 +0,0 @@
use memchr::memrchr_iter;
use nom::{
bytes::complete::take_while1,
character::complete::{anychar, space0},
combinator::{map, opt},
sequence::tuple,
IResult, InputTake, Slice,
};
use super::{
combinator::{
hash_token, l_bracket_token, line_starts_iter, node, r_bracket_token, token, trim_line_end,
GreenElement, NodeBuilder,
},
drawer::property_drawer_node,
element::element_nodes,
input::Input,
object::standard_object_nodes,
planning::planning_node,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn headline_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
crate::lossless_parser!(headline_node_base, input)
}
fn headline_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, stars) = headline_stars(input)?;
let mut b = NodeBuilder::new();
b.token(HEADLINE_STARS, stars);
let (input, ws) = space0(input)?;
b.ws(ws);
let (input, headline_keyword) = opt(headline_keyword_token)(input)?;
if let Some((headline_keyword, ws)) = headline_keyword {
b.push(headline_keyword);
b.ws(ws);
}
let (input, headline_priority) = opt(headline_priority_node)(input)?;
if let Some((headline_priority, ws)) = headline_priority {
b.push(headline_priority);
b.ws(ws);
}
let (input, (title_and_tags, ws_, nl)) = trim_line_end(input)?;
let (title, tags) = opt(headline_tags_node)(title_and_tags)?;
if !title.is_empty() {
b.push(node(HEADLINE_TITLE, standard_object_nodes(title)));
}
b.push_opt(tags);
b.ws(ws_);
b.nl(nl);
if input.is_empty() {
return Ok((input, b.finish(HEADLINE)));
}
let (input, planning) = opt(planning_node)(input)?;
b.push_opt(planning);
if input.is_empty() {
return Ok((input, b.finish(HEADLINE)));
}
let (input, property_drawer) = opt(property_drawer_node)(input)?;
b.push_opt(property_drawer);
if input.is_empty() {
return Ok((input, b.finish(HEADLINE)));
}
let (input, section) = opt(section_node)(input)?;
b.push_opt(section);
let mut i = input;
let current_level = stars.len();
while !i.is_empty() {
let next_level = i.bytes().take_while(|&c| c == b'*').count();
if next_level <= current_level {
break;
}
let (input, headline) = headline_node(i)?;
b.push(headline);
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len());
i = input;
}
Ok((i, b.finish(HEADLINE)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn section_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
let (input, section) = section_text(input)?;
Ok((input, node(SECTION, element_nodes(section)?)))
}
fn section_text(input: Input) -> IResult<Input, Input, ()> {
for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if headline_stars(input).is_ok() {
if section.is_empty() {
return Err(nom::Err::Error(()));
}
return Ok((input, section));
}
}
Ok(input.take_split(input.len()))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn headline_stars(input: Input) -> IResult<Input, Input, ()> {
let bytes = input.as_bytes();
let level = bytes.iter().take_while(|&&c| c == b'*').count();
if level == 0 {
return Err(nom::Err::Error(()));
}
// headline stars must be followed by space
else if matches!(bytes.get(level), Some(b' ')) {
Ok(input.take_split(level))
} else {
Err(nom::Err::Error(()))
}
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn headline_tags_node(input: Input) -> IResult<Input, GreenElement, ()> {
if !input.s.ends_with(':') {
return Err(nom::Err::Error(()));
};
let bytes = input.as_bytes();
// we're going to skip to first colon, so we start from the
// second last character
let mut i = input.len() - 1;
let mut can_not_be_ws = true;
let mut children = vec![token(COLON, ":")];
for ii in memrchr_iter(b':', bytes).skip(1) {
let item = &bytes[ii + 1..i];
if item.is_empty() {
children.push(token(COLON, ":"));
can_not_be_ws = false;
debug_assert!(i > ii, "{} > {}", i, ii);
i = ii;
} else if String::from_utf8_lossy(item)
.chars()
// https://github.com/yyr/org-mode/blob/d8494b5668ad4d4e68e83228ae8451eaa01d2220/lisp/org-element.el#L922C25-L922C32
.all(|c| c.is_alphanumeric() || c == '_' || c == '@' || c == '#' || c == '%')
{
children.push(input.slice(ii + 1..i).text_token());
children.push(token(COLON, ":"));
can_not_be_ws = false;
debug_assert!(i > ii, "{} > {}", i, ii);
i = ii;
} else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws {
children.push(input.slice(ii + 1..i).ws_token());
children.push(token(COLON, ":"));
can_not_be_ws = true;
debug_assert!(i > ii, "{} > {}", i, ii);
i = ii;
} else {
break;
}
}
if children.len() <= 2 {
return Err(nom::Err::Error(()));
}
if i != 0 && bytes[i - 1] != b' ' && bytes[i - 1] != b'\t' {
return Err(nom::Err::Error(()));
}
// we parse headline tag from right to left,
// so we need to reverse the result after it finishes
children.reverse();
Ok((input.slice(0..i), node(HEADLINE_TAGS, children)))
}
fn headline_keyword_token(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, word) = take_while1(|c: char| !c.is_ascii_whitespace())(input)?;
let (input, ws) = space0(input)?;
if input.c.todo_keywords.0.iter().any(|k| k == word.s) {
Ok((input, (word.token(HEADLINE_KEYWORD_TODO), ws)))
} else if input.c.todo_keywords.1.iter().any(|k| k == word.s) {
Ok((input, (word.token(HEADLINE_KEYWORD_DONE), ws)))
} else {
Err(nom::Err::Error(()))
}
}
fn headline_priority_node(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((l_bracket_token, hash_token, anychar, r_bracket_token)),
|(l_bracket, hash, char, r_bracket)| {
node(
HEADLINE_PRIORITY,
[l_bracket, hash, token(TEXT, &char.to_string()), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
#[test]
fn parse() {
use crate::{ast::Headline, tests::to_ast, ParseConfig};
let to_headline = to_ast::<Headline>(headline_node);
insta::assert_debug_snapshot!(
to_headline("* foo").syntax,
@r###"
HEADLINE@0..5
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_TITLE@2..5
TEXT@2..5 "foo"
"###
);
insta::assert_debug_snapshot!(
to_headline("* foo\n\n** bar").syntax,
@r###"
HEADLINE@0..13
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_TITLE@2..5
TEXT@2..5 "foo"
NEW_LINE@5..6 "\n"
SECTION@6..7
PARAGRAPH@6..7
BLANK_LINE@6..7 "\n"
HEADLINE@7..13
HEADLINE_STARS@7..9 "**"
WHITESPACE@9..10 " "
HEADLINE_TITLE@10..13
TEXT@10..13 "bar"
"###
);
insta::assert_debug_snapshot!(
to_headline("* TODO foo\nbar\n** baz\n").syntax,
@r###"
HEADLINE@0..22
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_KEYWORD_TODO@2..6 "TODO"
WHITESPACE@6..7 " "
HEADLINE_TITLE@7..10
TEXT@7..10 "foo"
NEW_LINE@10..11 "\n"
SECTION@11..15
PARAGRAPH@11..15
TEXT@11..15 "bar\n"
HEADLINE@15..22
HEADLINE_STARS@15..17 "**"
WHITESPACE@17..18 " "
HEADLINE_TITLE@18..21
TEXT@18..21 "baz"
NEW_LINE@21..22 "\n"
"###
);
insta::assert_debug_snapshot!(
to_headline("** [#A] foo\n* baz").syntax,
@r###"
HEADLINE@0..12
HEADLINE_STARS@0..2 "**"
WHITESPACE@2..3 " "
HEADLINE_PRIORITY@3..7
L_BRACKET@3..4 "["
HASH@4..5 "#"
TEXT@5..6 "A"
R_BRACKET@6..7 "]"
WHITESPACE@7..8 " "
HEADLINE_TITLE@8..11
TEXT@8..11 "foo"
NEW_LINE@11..12 "\n"
"###
);
let config = &ParseConfig::default();
assert!(headline_node(("_ ", config).into()).is_err());
assert!(headline_node(("*", config).into()).is_err());
assert!(headline_node((" * ", config).into()).is_err());
assert!(headline_node(("**", config).into()).is_err());
assert!(headline_node(("**\n", config).into()).is_err());
assert!(headline_node(("**\r", config).into()).is_err());
assert!(headline_node(("**\t", config).into()).is_err());
}
#[test]
fn issue_15_16() {
use crate::{ast::Headline, tests::to_ast};
let to_headline = to_ast::<Headline>(headline_node);
assert!(to_headline("* a ::").tags().count() == 0);
assert!(to_headline("* a : :").tags().count() == 0);
assert!(to_headline("* a :(:").tags().count() == 0);
assert!(to_headline("* a :a: :").tags().count() == 0);
assert!(to_headline("* a :a :").tags().count() == 0);
assert!(to_headline("* a a:").tags().count() == 0);
assert!(to_headline("* a :a").tags().count() == 0);
let tags = to_headline("* a \t:_:").tags();
assert_eq!(
vec!["_".to_string()],
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a \t :@:").tags();
assert_eq!(
vec!["@".to_string()],
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a :#:").tags();
assert_eq!(
vec!["#".to_string()],
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a\t :%:").tags();
assert_eq!(
vec!["%".to_string()],
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
);
let tags = to_headline("* a :余: :破:").tags();
assert_eq!(
vec!["".to_string(), "".to_string()],
tags.map(|x| x.to_string()).collect::<Vec<_>>(),
);
}

View file

@ -1,127 +0,0 @@
use nom::{
bytes::complete::{tag, take_till},
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
l_bracket_token, l_parens_token, node, r_bracket_token, r_parens_token, GreenElement,
},
input::Input,
SyntaxKind,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn inline_call_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
tag("call_"),
take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'),
opt(tuple((
l_bracket_token,
take_till(|c| c == ']' || c == '\n'),
r_bracket_token,
))),
l_parens_token,
take_till(|c| c == ')' || c == '\n'),
r_parens_token,
opt(tuple((
l_bracket_token,
take_till(|c| c == ']' || c == '\n'),
r_bracket_token,
))),
)),
|(call, name, inside_header, l_paren, arguments, r_paren, end_header)| {
let mut children = vec![call.text_token()];
children.push(name.text_token());
if let Some((l_bracket, header, r_bracket)) = inside_header {
children.push(l_bracket);
children.push(header.text_token());
children.push(r_bracket);
}
children.push(l_paren);
children.push(arguments.text_token());
children.push(r_paren);
if let Some((l_bracket, header, r_bracket)) = end_header {
children.push(l_bracket);
children.push(header.text_token());
children.push(r_bracket);
}
node(SyntaxKind::INLINE_CALL, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::InlineCall, tests::to_ast};
let to_inline_call = to_ast::<InlineCall>(inline_call_node);
let call = to_inline_call("call_square(4)");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..14
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_PARENS@11..12 "("
TEXT@12..13 "4"
R_PARENS@13..14 ")"
"###
);
let call = to_inline_call("call_square[:results output](4)");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..31
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_BRACKET@11..12 "["
TEXT@12..27 ":results output"
R_BRACKET@27..28 "]"
L_PARENS@28..29 "("
TEXT@29..30 "4"
R_PARENS@30..31 ")"
"###
);
let call = to_inline_call("call_square(4)[:results html]");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..29
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_PARENS@11..12 "("
TEXT@12..13 "4"
R_PARENS@13..14 ")"
L_BRACKET@14..15 "["
TEXT@15..28 ":results html"
R_BRACKET@28..29 "]"
"###
);
let call = to_inline_call("call_square[:results output](4)[:results html]");
insta::assert_debug_snapshot!(
call.syntax,
@r###"
INLINE_CALL@0..46
TEXT@0..5 "call_"
TEXT@5..11 "square"
L_BRACKET@11..12 "["
TEXT@12..27 ":results output"
R_BRACKET@27..28 "]"
L_PARENS@28..29 "("
TEXT@29..30 "4"
R_PARENS@30..31 ")"
L_BRACKET@31..32 "["
TEXT@32..45 ":results html"
R_BRACKET@45..46 "]"
"###
);
}

View file

@ -1,85 +0,0 @@
use nom::{
bytes::complete::{tag, take_till, take_while1},
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
l_bracket_token, l_curly_token, node, r_bracket_token, r_curly_token, GreenElement,
},
input::Input,
SyntaxKind,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn inline_src_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
tag("src_"),
take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{'),
opt(tuple((
l_bracket_token,
take_till(|c| c == '\n' || c == ']'),
r_bracket_token,
))),
l_curly_token,
take_till(|c| c == '\n' || c == '}'),
r_curly_token,
)),
|(src, lang, options, l_curly, body, r_curly)| {
let mut children = vec![src.text_token(), lang.text_token()];
if let Some((l_bracket, options, r_bracket)) = options {
children.push(l_bracket);
children.push(options.text_token());
children.push(r_bracket);
}
children.push(l_curly);
children.push(body.text_token());
children.push(r_curly);
node(SyntaxKind::INLINE_SRC, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::InlineSrc, tests::to_ast, ParseConfig};
let to_inline_src = to_ast::<InlineSrc>(inline_src_node);
insta::assert_debug_snapshot!(
to_inline_src("src_C{int a = 0;}").syntax,
@r###"
INLINE_SRC@0..17
TEXT@0..4 "src_"
TEXT@4..5 "C"
L_CURLY@5..6 "{"
TEXT@6..16 "int a = 0;"
R_CURLY@16..17 "}"
"###
);
insta::assert_debug_snapshot!(
to_inline_src("src_xml[:exports code]{<tag>text</tag>}").syntax,
@r###"
INLINE_SRC@0..39
TEXT@0..4 "src_"
TEXT@4..7 "xml"
L_BRACKET@7..8 "["
TEXT@8..21 ":exports code"
R_BRACKET@21..22 "]"
L_CURLY@22..23 "{"
TEXT@23..38 "<tag>text</tag>"
R_CURLY@38..39 "}"
"###
);
let config = &ParseConfig::default();
assert!(inline_src_node(("src_xml[:exports code]{<tag>text</tag>", config).into()).is_err());
assert!(inline_src_node(("src_[:exports code]{<tag>text</tag>}", config).into()).is_err());
assert!(inline_src_node(("src_xml[:exports code]", config).into()).is_err());
}

View file

@ -1,242 +0,0 @@
use nom::{
error::{ErrorKind, ParseError},
Compare, CompareResult, Err, FindSubstring, IResult, InputIter, InputLength, InputTake,
InputTakeAtPosition, Needed, Offset, Slice,
};
use std::{
ops::{Deref, Range, RangeFrom, RangeFull, RangeTo},
str::{CharIndices, Chars},
};
use super::{
combinator::{token, GreenElement},
SyntaxKind,
};
use crate::config::ParseConfig;
/// A custom Input struct
///
/// It helps us to pass the `ParseConfig` all the way down to each parsers
#[derive(Clone, Copy, Debug)]
pub struct Input<'a> {
pub(crate) s: &'a str,
pub(crate) c: &'a ParseConfig,
}
impl<'a> Input<'a> {
#[inline]
pub(crate) fn of(&self, i: &'a str) -> Input<'a> {
Input { s: i, c: self.c }
}
#[inline]
pub fn as_str(&self) -> &'a str {
self.s
}
#[inline]
pub fn token(&self, kind: SyntaxKind) -> GreenElement {
token(kind, self.s)
}
#[inline]
pub fn text_token(&self) -> GreenElement {
token(SyntaxKind::TEXT, self.s)
}
#[inline]
pub fn ws_token(&self) -> GreenElement {
token(SyntaxKind::WHITESPACE, self.s)
}
#[inline]
pub fn nl_token(&self) -> GreenElement {
token(SyntaxKind::NEW_LINE, self.s)
}
}
impl<'a> Deref for Input<'a> {
type Target = str;
#[inline]
fn deref(&self) -> &'a str {
self.s
}
}
impl<'a> From<(&'a str, &'a ParseConfig)> for Input<'a> {
fn from(value: (&'a str, &'a ParseConfig)) -> Self {
Input {
s: value.0,
c: value.1,
}
}
}
impl<'a> Slice<Range<usize>> for Input<'a> {
fn slice(&self, range: Range<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeTo<usize>> for Input<'a> {
fn slice(&self, range: RangeTo<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeFrom<usize>> for Input<'a> {
fn slice(&self, range: RangeFrom<usize>) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a> Slice<RangeFull> for Input<'a> {
fn slice(&self, range: RangeFull) -> Self {
self.of(self.s.slice(range))
}
}
impl<'a, 'b> FindSubstring<&'b str> for Input<'a> {
fn find_substring(&self, substr: &str) -> Option<usize> {
self.s.find(substr)
}
}
impl<'a, 'b> Compare<&'b str> for Input<'a> {
#[inline]
fn compare(&self, t: &'b str) -> CompareResult {
self.s.compare(t)
}
#[inline]
fn compare_no_case(&self, t: &'b str) -> CompareResult {
self.s.compare_no_case(t)
}
}
impl<'a> InputLength for Input<'a> {
#[inline]
fn input_len(&self) -> usize {
self.len()
}
}
impl<'a> InputIter for Input<'a> {
type Item = char;
type Iter = CharIndices<'a>;
type IterElem = Chars<'a>;
#[inline]
fn iter_indices(&self) -> Self::Iter {
self.s.char_indices()
}
#[inline]
fn iter_elements(&self) -> Self::IterElem {
self.s.chars()
}
fn position<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::Item) -> bool,
{
self.s.position(predicate)
}
#[inline]
fn slice_index(&self, count: usize) -> Result<usize, Needed> {
self.s.slice_index(count)
}
}
impl<'a> InputTake for Input<'a> {
#[inline]
fn take(&self, count: usize) -> Self {
let s = self.s.take(count);
self.of(s)
}
#[inline]
fn take_split(&self, count: usize) -> (Self, Self) {
let (l, r) = self.s.take_split(count);
(self.of(l), self.of(r))
}
}
impl<'a> InputTakeAtPosition for Input<'a> {
type Item = char;
#[inline]
fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self.s.split_at_position::<_, (&str, ErrorKind)>(predicate) {
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position1<P, E: ParseError<Self>>(
&self,
predicate: P,
e: ErrorKind,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position1::<_, (&str, ErrorKind)>(predicate, e)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position_complete<P, E: ParseError<Self>>(
&self,
predicate: P,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position_complete::<_, (&str, ErrorKind)>(predicate)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
#[inline]
fn split_at_position1_complete<P, E: ParseError<Self>>(
&self,
predicate: P,
e: ErrorKind,
) -> IResult<Self, Self, E>
where
P: Fn(Self::Item) -> bool,
{
match self
.s
.split_at_position1_complete::<_, (&str, ErrorKind)>(predicate, e)
{
Ok((l, r)) => Ok((self.of(l), self.of(r))),
Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))),
Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))),
Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)),
}
}
}
impl<'a> Offset for Input<'a> {
fn offset(&self, second: &Self) -> usize {
self.s.offset(second.s)
}
}

View file

@ -1,284 +0,0 @@
#![allow(clippy::type_complexity)]
use nom::{
branch::alt,
bytes::complete::{tag, take_till, take_while1},
character::complete::space0,
combinator::{recognize, verify},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{blank_lines, hash_plus_token, node, trim_line_end, GreenElement},
input::Input,
SyntaxKind,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn keyword_node(input: Input) -> IResult<Input, GreenElement, ()> {
fn f(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (key, mut nodes)) = keyword_node_base(input)?;
let (input, post_blank) = blank_lines(input)?;
nodes.extend(post_blank);
Ok((
input,
node(
if key == "CALL" {
SyntaxKind::BABEL_CALL
} else {
SyntaxKind::KEYWORD
},
nodes,
),
))
}
crate::lossless_parser!(f, input)
}
/// Return empty vector if input doesn't contain affiliated keyword, or affiliated keyword is
/// followed by blank lines.
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut children = vec![];
let mut i = input;
while !i.is_empty() {
let Ok((input_, (key, nodes))) = keyword_node_base(i) else {
break;
};
let (input_, post_blank) = blank_lines(input_)?;
// affiliated keyword can not followed by blank lines or eof
if !post_blank.is_empty() || input_.is_empty() {
return Ok((input, vec![]));
}
if input_.c.affiliated_keywords.iter().all(|w| w != key) && !key.starts_with("ATTR_") {
break;
}
debug_assert!(i.len() > input_.len(), "{} > {}", i.len(), input_.len());
i = input_;
children.push(node(SyntaxKind::AFFILIATED_KEYWORD, nodes));
}
Ok((i, children))
}
pub fn tblfm_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut children = vec![];
let mut i = input;
while !i.is_empty() {
let Ok((input, (key, nodes))) = keyword_node_base(i) else {
break;
};
if !key.eq_ignore_ascii_case("TBLFM") {
break;
}
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len());
i = input;
children.push(node(SyntaxKind::KEYWORD, nodes));
}
Ok((i, children))
}
fn keyword_node_base(input: Input) -> IResult<Input, (&str, Vec<GreenElement>), ()> {
let (input, (ws, hash_plus)) = tuple((space0, hash_plus_token))(input)?;
let (input, (key, optional, colon)) = alt((key_with_optional, key))(input)?;
let (input, (value, ws_, nl)) = trim_line_end(input)?;
let mut children = vec![];
if !ws.is_empty() {
children.push(ws.ws_token());
}
children.push(hash_plus);
children.push(key.text_token());
if let Some((l_bracket, optional, r_bracket)) = optional {
children.push(l_bracket.token(SyntaxKind::L_BRACKET));
children.push(optional.text_token());
children.push(r_bracket.token(SyntaxKind::R_BRACKET));
}
children.push(colon.token(SyntaxKind::COLON));
children.push(value.text_token());
if !ws_.is_empty() {
children.push(ws_.ws_token());
}
if !nl.is_empty() {
children.push(nl.nl_token());
}
Ok((input, (key.s, children)))
}
fn key(input: Input) -> IResult<Input, (Input, Option<(Input, Input, Input)>, Input), ()> {
let (input, output) = verify(
recognize(tuple((
take_till(|c: char| c.is_ascii_whitespace() || c == ':'),
take_while1(|c: char| c == ':'),
))),
|i: &Input| i.len() >= 2,
)(input)?;
let (colon, key) = output.take_split(output.len() - 1);
Ok((input, (key, None, colon)))
}
fn key_with_optional(
input: Input,
) -> IResult<Input, (Input, Option<(Input, Input, Input)>, Input), ()> {
let (input, (key, r_backer, optional, l_backer, colon)) = tuple((
alt((tag("CAPTION"), tag("RESULTS"))),
tag("["),
take_till(|c| c == '\r' || c == '\n' || c == ']'),
tag("]"),
tag(":"),
))(input)?;
Ok((input, (key, Some((r_backer, optional, l_backer)), colon)))
}
#[test]
fn parse() {
use crate::{
ast::{BabelCall, Keyword},
tests::to_ast,
ParseConfig,
};
let to_keyword = to_ast::<Keyword>(keyword_node);
let to_babel_call = to_ast::<BabelCall>(keyword_node);
to_keyword("#+KEY:");
to_keyword("#+::");
to_keyword("#+::");
to_keyword("#+:: ");
to_keyword("#+:: \n");
to_keyword("#+::\n");
insta::assert_debug_snapshot!(
to_keyword("#+KEY:").syntax,
@r###"
KEYWORD@0..6
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..6 ""
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+KEY: VALUE").syntax,
@r###"
KEYWORD@0..12
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..12 " VALUE"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+K_E_Y: VALUE").syntax,
@r###"
KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..7 "K_E_Y"
COLON@7..8 ":"
TEXT@8..14 " VALUE"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+KEY:VALUE\n").syntax,
@r###"
KEYWORD@0..12
HASH_PLUS@0..2 "#+"
TEXT@2..5 "KEY"
COLON@5..6 ":"
TEXT@6..11 "VALUE"
NEW_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+RESULTS:").syntax,
@r###"
KEYWORD@0..10
HASH_PLUS@0..2 "#+"
TEXT@2..9 "RESULTS"
COLON@9..10 ":"
TEXT@10..10 ""
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+ATTR_LATEX: :width 5cm\n").syntax,
@r###"
KEYWORD@0..25
HASH_PLUS@0..2 "#+"
TEXT@2..12 "ATTR_LATEX"
COLON@12..13 ":"
TEXT@13..24 " :width 5cm"
NEW_LINE@24..25 "\n"
"###
);
insta::assert_debug_snapshot!(
to_babel_call("#+CALL: double(n=4)").syntax,
@r###"
BABEL_CALL@0..19
HASH_PLUS@0..2 "#+"
TEXT@2..6 "CALL"
COLON@6..7 ":"
TEXT@7..19 " double(n=4)"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+ABC[OPTIONAL]: Longer value.").syntax,
@r###"
KEYWORD@0..30
HASH_PLUS@0..2 "#+"
TEXT@2..15 "ABC[OPTIONAL]"
COLON@15..16 ":"
TEXT@16..30 " Longer value."
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+CAPTION: value").syntax,
@r###"
KEYWORD@0..16
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..16 " value"
"###
);
insta::assert_debug_snapshot!(
to_keyword("#+CAPTION[caption optional]: value").syntax,
@r###"
KEYWORD@0..34
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
L_BRACKET@9..10 "["
TEXT@10..26 "caption optional"
R_BRACKET@26..27 "]"
COLON@27..28 ":"
TEXT@28..34 " value"
"###
);
let config = &ParseConfig::default();
assert!(keyword_node(("#+KE Y: VALUE", config).into()).is_err());
assert!(keyword_node(("#+ KEY: VALUE", config).into()).is_err());
}

View file

@ -1,124 +0,0 @@
use nom::{
bytes::complete::{tag, take_while1},
character::complete::space0,
sequence::tuple,
IResult, InputTake,
};
use crate::SyntaxKind;
use super::{
combinator::{eol_or_eof, l_curly_token, line_starts_iter, node, r_curly_token, GreenElement},
input::Input,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn latex_environment_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(latex_environment_node_base, input)
}
fn latex_environment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws1, begin, l1, name1, r1)) = tuple((
space0,
tag("\\begin"),
l_curly_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '*'),
r_curly_token,
))(input)?;
for (input, contents) in line_starts_iter(input.s).map(|i| input.take_split(i)) {
if let Ok((input, (ws2, end, l2, name2, r2, ws3, nl))) = tuple((
space0,
tag("\\end"),
l_curly_token,
tag(name1.s),
r_curly_token,
space0,
eol_or_eof,
))(input)
{
return Ok((
input,
node(
SyntaxKind::LATEX_ENVIRONMENT,
[
ws1.ws_token(),
begin.text_token(),
l1,
name1.text_token(),
r1,
contents.text_token(),
ws2.ws_token(),
end.text_token(),
l2,
name2.text_token(),
r2,
ws3.ws_token(),
nl.nl_token(),
],
),
));
}
}
Err(nom::Err::Error(()))
}
#[test]
fn parse() {
use crate::ast::LatexEnvironment;
use crate::config::ParseConfig;
use crate::tests::to_ast;
let to_latex = to_ast::<LatexEnvironment>(latex_environment_node);
insta::assert_debug_snapshot!(
to_latex(r"\begin{NAME}\end{NAME}").syntax,
@r###"
LATEX_ENVIRONMENT@0..22
WHITESPACE@0..0 ""
TEXT@0..6 "\\begin"
L_CURLY@6..7 "{"
TEXT@7..11 "NAME"
R_CURLY@11..12 "}"
TEXT@12..12 ""
WHITESPACE@12..12 ""
TEXT@12..16 "\\end"
L_CURLY@16..17 "{"
TEXT@17..21 "NAME"
R_CURLY@21..22 "}"
WHITESPACE@22..22 ""
NEW_LINE@22..22 ""
"###
);
insta::assert_debug_snapshot!(
to_latex(
r"\begin{align*}
2x - 5y &= 8 \\
3x + 9y &= -12
\end{align*}"
).syntax,
@r###"
LATEX_ENVIRONMENT@0..70
WHITESPACE@0..0 ""
TEXT@0..6 "\\begin"
L_CURLY@6..7 "{"
TEXT@7..13 "align*"
R_CURLY@13..14 "}"
TEXT@14..54 "\n 2x - 5y &= 8 \\\\\n ..."
WHITESPACE@54..58 " "
TEXT@58..62 "\\end"
L_CURLY@62..63 "{"
TEXT@63..69 "align*"
R_CURLY@69..70 "}"
WHITESPACE@70..70 ""
NEW_LINE@70..70 ""
"###
);
let c = ParseConfig::default();
assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err());
assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err());
}

View file

@ -1,196 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{take_until1, take_while1},
character::complete::alpha1,
sequence::tuple,
IResult, InputTake,
};
use crate::SyntaxKind;
use super::{
combinator::{
backslash_token, dollar2_token, dollar_token, l_bracket_token, l_curly_token,
l_parens_token, node, r_bracket_token, r_curly_token, r_parens_token, GreenElement,
},
input::Input,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn latex_fragment_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with(['\\', '$']));
let mut parser = alt((template1, template2, template3, template4, template5));
crate::lossless_parser!(parser, input)
}
// \NAME[CONTENTS1] \NAME{CONTENTS1}
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (backslash, name)) = tuple((backslash_token, alpha1))(input)?;
let (input, (l, content, r)) = alt((
tuple((
l_bracket_token,
take_while1(|c| c != '{' && c != '}' && c != '[' && c != ']' && c != '\r' && c != '\n'),
r_bracket_token,
)),
tuple((
l_curly_token,
take_while1(|c| c != '{' && c != '}' && c != '\r' && c != '\n'),
r_curly_token,
)),
))(input)?;
Ok((
input,
node(
SyntaxKind::LATEX_FRAGMENT,
[backslash, name.text_token(), l, content.text_token(), r],
),
))
}
// \(CONTENTS\)
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (backslash1, l)) = tuple((backslash_token, l_parens_token))(input)?;
if let Some(i) = jetscii::Substring::new("\\)").find(input.s) {
let (input, content) = input.take_split(i);
let (input, (backslash2, r)) = tuple((backslash_token, r_parens_token))(input)?;
Ok((
input,
node(
SyntaxKind::LATEX_FRAGMENT,
[backslash1, l, content.text_token(), backslash2, r],
),
))
} else {
Err(nom::Err::Error(()))
}
}
// \[CONTENTS\]
fn template3(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (backslash1, l)) = tuple((backslash_token, l_bracket_token))(input)?;
if let Some(i) = jetscii::Substring::new("\\]").find(input.s) {
let (input, content) = input.take_split(i);
let (input, (backslash2, r)) = tuple((backslash_token, r_bracket_token))(input)?;
Ok((
input,
node(
SyntaxKind::LATEX_FRAGMENT,
[backslash1, l, content.text_token(), backslash2, r],
),
))
} else {
Err(nom::Err::Error(()))
}
}
// $$CONTENTS$$
fn template4(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l) = dollar2_token(input)?;
let (input, content) = take_until1("$$")(input)?;
let (input, r) = dollar2_token(input)?;
Ok((
input,
node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]),
))
}
// $CONTENTS$
fn template5(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l) = dollar_token(input)?;
let (input, content) = take_until1("$")(input)?;
let (input, r) = dollar_token(input)?;
let b = content.as_bytes()[0];
if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b';' | b'$') {
return Err(nom::Err::Error(()));
}
let b = content.as_bytes()[content.s.len() - 1];
if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b'$') {
return Err(nom::Err::Error(()));
}
let p = input.bytes().next();
if let Some(p) = p {
if !matches!(p, b')' | b'}' | b']' | b'\'' | b'"' | b' ' | b'\r' | b'\n') {
return Err(nom::Err::Error(()));
}
}
Ok((
input,
node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]),
))
}
#[test]
fn parse() {
use crate::{ast::LatexFragment, tests::to_ast, ParseConfig};
let to_fragment = to_ast::<LatexFragment>(latex_fragment_node);
insta::assert_debug_snapshot!(
to_fragment("\\enlargethispage{2\\baselineskip}").syntax,
@r###"
LATEX_FRAGMENT@0..32
BACKSLASH@0..1 "\\"
TEXT@1..16 "enlargethispage"
L_CURLY@16..17 "{"
TEXT@17..31 "2\\baselineskip"
R_CURLY@31..32 "}"
"###
);
insta::assert_debug_snapshot!(
to_fragment("\\[a\\]").syntax,
@r###"
LATEX_FRAGMENT@0..5
BACKSLASH@0..1 "\\"
L_BRACKET@1..2 "["
TEXT@2..3 "a"
BACKSLASH@3..4 "\\"
R_BRACKET@4..5 "]"
"###
);
insta::assert_debug_snapshot!(
to_fragment("\\(e^{i \\pi}\\)").syntax,
@r###"
LATEX_FRAGMENT@0..13
BACKSLASH@0..1 "\\"
L_PARENS@1..2 "("
TEXT@2..11 "e^{i \\pi}"
BACKSLASH@11..12 "\\"
R_PARENS@12..13 ")"
"###
);
insta::assert_debug_snapshot!(
to_fragment("$\\frac{1}{3}$").syntax,
@r###"
LATEX_FRAGMENT@0..13
DOLLAR@0..1 "$"
TEXT@1..12 "\\frac{1}{3}"
DOLLAR@12..13 "$"
"###
);
insta::assert_debug_snapshot!(
to_fragment("$a\nb$").syntax,
@r###"
LATEX_FRAGMENT@0..5
DOLLAR@0..1 "$"
TEXT@1..4 "a\nb"
DOLLAR@4..5 "$"
"###
);
let c = ParseConfig::default();
assert!(latex_fragment_node(("$ LaTeXxxx$", &c).into()).is_err());
assert!(latex_fragment_node(("$LaTeXxxx $", &c).into()).is_err());
assert!(latex_fragment_node(("$a.$", &c).into()).is_err());
assert!(latex_fragment_node(("$a$a", &c).into()).is_err());
assert!(latex_fragment_node(("$$b\nol\nd*", &c).into()).is_err());
assert!(latex_fragment_node(("$b\nol\nd*", &c).into()).is_err());
}

View file

@ -1,71 +0,0 @@
use nom::{character::complete::space0, combinator::map, sequence::tuple, IResult};
use crate::{
syntax::combinator::{backslash_token, eol_or_eof, node},
SyntaxKind,
};
use super::{combinator::GreenElement, input::Input};
pub fn line_break_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with('\\'));
let mut parser = map(
tuple((backslash_token, backslash_token, space0, eol_or_eof)),
|(b1, b2, ws, nl)| {
node(
SyntaxKind::LINE_BREAK,
[b1, b2, ws.ws_token(), nl.nl_token()],
)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::LineBreak;
use crate::tests::to_ast;
let to_line_break = to_ast::<LineBreak>(line_break_node);
insta::assert_debug_snapshot!(
to_line_break("\\\\\n").syntax,
@r###"
LINE_BREAK@0..3
BACKSLASH@0..1 "\\"
BACKSLASH@1..2 "\\"
WHITESPACE@2..2 ""
NEW_LINE@2..3 "\n"
"###
);
insta::assert_debug_snapshot!(
to_line_break("\\\\ \n").syntax,
@r###"
LINE_BREAK@0..6
BACKSLASH@0..1 "\\"
BACKSLASH@1..2 "\\"
WHITESPACE@2..5 " "
NEW_LINE@5..6 "\n"
"###
);
insta::assert_debug_snapshot!(
to_line_break("\\\\\r\n").syntax,
@r###"
LINE_BREAK@0..4
BACKSLASH@0..1 "\\"
BACKSLASH@1..2 "\\"
WHITESPACE@2..2 ""
NEW_LINE@2..4 "\r\n"
"###
);
insta::assert_debug_snapshot!(
to_line_break("\\\\ ").syntax,
@r###"
LINE_BREAK@0..6
BACKSLASH@0..1 "\\"
BACKSLASH@1..2 "\\"
WHITESPACE@2..6 " "
NEW_LINE@6..6 ""
"###
);
}

View file

@ -1,109 +0,0 @@
use nom::{
bytes::complete::take_while,
combinator::{map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement,
},
input::Input,
object::link_description_object_nodes,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn link_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_bracket2_token,
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
opt(tuple((
r_bracket_token,
l_bracket_token,
take_while(|c: char| c != '[' && c != ']'),
))),
r_bracket2_token,
)),
|(l_bracket2, path, desc, r_bracket2)| {
let mut children = vec![l_bracket2, path.token(LINK_PATH)];
if let Some((r_bracket, l_bracket, desc)) = desc {
children.extend([r_bracket, l_bracket]);
children.extend(link_description_object_nodes(desc));
}
children.push(r_bracket2);
node(LINK, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::Link, tests::to_ast, ParseConfig};
let to_link = to_ast::<Link>(link_node);
let link = to_link("[[#id]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..7
L_BRACKET2@0..2 "[["
LINK_PATH@2..5 "#id"
R_BRACKET2@5..7 "]]"
"###
);
let link = to_link("[[#id][desc]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..13
L_BRACKET2@0..2 "[["
LINK_PATH@2..5 "#id"
R_BRACKET@5..6 "]"
L_BRACKET@6..7 "["
TEXT@7..11 "desc"
R_BRACKET2@11..13 "]]"
"###
);
let link = to_link("[[file:/home/dominik/images/jupiter.jpg]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..41
L_BRACKET2@0..2 "[["
LINK_PATH@2..39 "file:/home/dominik/im ..."
R_BRACKET2@39..41 "]]"
"###
);
let link = to_link("[[https://orgmode.org][*bold* description]]");
insta::assert_debug_snapshot!(
link.syntax,
@r###"
LINK@0..43
L_BRACKET2@0..2 "[["
LINK_PATH@2..21 "https://orgmode.org"
R_BRACKET@21..22 "]"
L_BRACKET@22..23 "["
BOLD@23..29
STAR@23..24 "*"
TEXT@24..28 "bold"
STAR@28..29 "*"
TEXT@29..41 " description"
R_BRACKET2@41..43 "]]"
"###
);
let config = &ParseConfig::default();
assert!(link_node(("[[#id][desc]", config).into()).is_err());
}

View file

@ -1,599 +0,0 @@
use memchr::{memchr, memchr2};
use nom::{
branch::alt,
bytes::complete::{tag, take},
character::complete::{alphanumeric1, digit1, space0, space1},
combinator::{cond, map, opt, recognize, verify},
sequence::{preceded, tuple},
IResult, InputTake,
};
use super::{
combinator::{
at_token, blank_lines, colon2_token, eol_or_eof, l_bracket_token, line_starts_iter, node,
r_bracket_token, GreenElement,
},
element::element_node,
input::Input,
keyword::affiliated_keyword_nodes,
object::standard_object_nodes,
paragraph::paragraph_nodes,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn list_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(list_node_base, input)
}
fn list_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
let (input, first_indent) = space0(input)?;
let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?;
let mut children = vec![];
children.extend(affiliated_keywords);
children.push(first_item);
let mut input = input;
while !ends_with_empty_blank_lines && !input.is_empty() {
let (input_, indent) = space0(input)?;
if indent.len() != first_indent.len() {
break;
}
let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_)
else {
break;
};
children.push(list_item);
debug_assert!(
input.len() > input_.len(),
"{} > {}",
input.len(),
input_.len(),
);
input = input_;
if ends_with_empty_blank_lines {
break;
}
}
let (input, post_blank) = blank_lines(input)?;
children.extend(post_blank);
Ok((input, node(LIST, children)))
}
#[tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s))]
fn list_item_node<'a>(
indent: Input<'a>,
input: Input<'a>,
) -> IResult<Input<'a>, (bool, GreenElement), ()> {
let (input, bullet) = recognize(tuple((
alt((
tag("+"),
tag("*"),
tag("-"),
preceded(digit1, tag(".")),
preceded(digit1, tag(")")),
)),
alt((space1, eol_or_eof)),
)))(input)?;
// list item cannot have an asterisk at the beginning of line
if indent.is_empty() && bullet.s.starts_with('*') {
return Err(nom::Err::Error(()));
}
if input.is_empty() {
return Ok((
input,
(
false,
node(
LIST_ITEM,
[
indent.token(LIST_ITEM_INDENT),
bullet.token(LIST_ITEM_BULLET),
],
),
),
));
}
let is_ordered = bullet.s.starts_with(|c: char| c.is_ascii_digit());
let (input, counter) = opt(list_item_counter)(input)?;
let (input, checkbox) = opt(list_item_checkbox)(input)?;
let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?;
let (input, (ends_with_empty_blank_lines, content)) =
list_item_content_node(input, indent.len())?;
let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?;
let mut children = vec![
indent.token(LIST_ITEM_INDENT),
bullet.token(LIST_ITEM_BULLET),
];
if let Some((counter, ws)) = counter {
children.extend([counter, ws.ws_token()]);
}
if let Some((checkbox, ws)) = checkbox {
children.extend([checkbox, ws.ws_token()]);
}
if let Some(Some((tag, ws))) = tag {
children.extend([tag, ws.ws_token()]);
}
children.push(content);
if let Some(post_blank) = post_blank {
children.extend(post_blank);
}
Ok((
input,
(ends_with_empty_blank_lines, node(LIST_ITEM, children)),
))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_counter(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)),
|(l_bracket, at, char, r_bracket)| {
node(
LIST_ITEM_COUNTER,
[l_bracket, at, char.text_token(), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_checkbox(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let (input, node) = map(
tuple((
l_bracket_token,
verify(take(1usize), |input: &Input| {
input.s == " " || input.s == "X" || input.s == "-"
}),
r_bracket_token,
)),
|(l_bracket, char, r_bracket)| {
node(
LIST_ITEM_CHECK_BOX,
[l_bracket, char.text_token(), r_bracket],
)
},
)(input)?;
let (input, ws) = space0(input)?;
Ok((input, (node, ws)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_tag(input: Input) -> IResult<Input, (GreenElement, Input), ()> {
let bytes = input.as_bytes();
let (input, tag) = match memchr2(b'\n', b':', bytes) {
Some(idx) if idx > 0 && bytes[idx] == b':' => input.take_split(idx),
_ => return Err(nom::Err::Error(())),
};
let (input, ws) = space0(input)?;
let (input, colon2) = colon2_token(input)?;
let mut children = standard_object_nodes(tag);
children.push(colon2);
Ok((input, (node(LIST_ITEM_TAG, children), ws)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
fn list_item_content_node(input: Input, indent: usize) -> IResult<Input, (bool, GreenElement), ()> {
if memchr(b'\n', input.as_bytes()).is_none() {
return Ok((
input.of(""),
(
false,
node(
LIST_ITEM_CONTENT,
[node(PARAGRAPH, standard_object_nodes(input))],
),
),
));
};
let mut skip_one = true;
let mut i = input;
let mut children = vec![];
let mut previous_blank_line: Option<(Input, Input)> = None;
'l: while !i.is_empty() {
for (input, head) in line_starts_iter(i.as_str())
// the first line in list item content will always be a paragraph
// so we need to skip it in the first iteration
.skip(if skip_one { 1 } else { 0 })
.map(|idx| i.take_split(idx))
{
match get_line_indent(input.as_str()) {
Some(next_indent) => {
if next_indent <= indent {
let (input, head) = previous_blank_line.unwrap_or((input, head));
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
return Ok((input, (false, node(LIST_ITEM_CONTENT, children))));
}
previous_blank_line = None;
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
children.push(element);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
skip_one = false;
continue 'l;
}
}
_ => {
// list item ends at two consecutive empty lines
if let Some((input, head)) = previous_blank_line {
if !head.is_empty() {
children.extend(paragraph_nodes(head)?);
}
return Ok((input, (true, node(LIST_ITEM_CONTENT, children))));
} else {
previous_blank_line = Some((input, head))
}
}
}
}
children.extend(paragraph_nodes(i)?);
break;
}
Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children))))
}
fn get_line_indent(input: &str) -> Option<usize> {
input
.bytes()
.take_while(|b| *b != b'\n')
.position(|b| !b.is_ascii_whitespace())
}
#[test]
fn parse() {
use crate::{ast::List, tests::to_ast, ParseConfig};
let to_list = to_ast::<List>(list_node);
insta::assert_debug_snapshot!(
to_list("1)").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "1)"
"###
);
insta::assert_debug_snapshot!(
to_list("+ ").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
"###
);
insta::assert_debug_snapshot!(
to_list("-\n").syntax,
@r###"
LIST@0..2
LIST_ITEM@0..2
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "-\n"
"###
);
insta::assert_debug_snapshot!(
to_list("+ 1").syntax,
@r###"
LIST@0..3
LIST_ITEM@0..3
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..3
PARAGRAPH@2..3
TEXT@2..3 "1"
"###
);
insta::assert_debug_snapshot!(
to_list("+ 1\n").syntax,
@r###"
LIST@0..4
LIST_ITEM@0..4
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..4
PARAGRAPH@2..4
TEXT@2..4 "1\n"
"###
);
// list ends with two consecutive blank lines, and these blank lines
// will be the post_blank of list node
insta::assert_debug_snapshot!(
to_list("+ [@A] 1\n\n\n+ 2").syntax,
@r###"
LIST@0..11
LIST_ITEM@0..9
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_COUNTER@2..6
L_BRACKET@2..3 "["
AT@3..4 "@"
TEXT@4..5 "A"
R_BRACKET@5..6 "]"
WHITESPACE@6..7 " "
LIST_ITEM_CONTENT@7..9
PARAGRAPH@7..9
TEXT@7..9 "1\n"
BLANK_LINE@9..10 "\n"
BLANK_LINE@10..11 "\n"
"###
);
// empty line between list item, the empty line will be
// the post_blank of first item
insta::assert_debug_snapshot!(
to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax,
@r###"
LIST@0..29
LIST_ITEM@0..18
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_TAG@2..10
BOLD@2..7
STAR@2..3 "*"
TEXT@3..6 "TAG"
STAR@6..7 "*"
TEXT@7..8 " "
COLON2@8..10 "::"
WHITESPACE@10..10 ""
LIST_ITEM_CONTENT@10..17
PARAGRAPH@10..17
TEXT@10..17 " item1\n"
BLANK_LINE@17..18 "\n"
LIST_ITEM@18..29
LIST_ITEM_INDENT@18..18 ""
LIST_ITEM_BULLET@18..20 "+ "
LIST_ITEM_CHECK_BOX@20..23
L_BRACKET@20..21 "["
TEXT@21..22 "X"
R_BRACKET@22..23 "]"
WHITESPACE@23..24 " "
LIST_ITEM_CONTENT@24..29
PARAGRAPH@24..29
TEXT@24..29 "item2"
"###
);
// nested list
let list = to_list(
r#"+ item1
+ item2"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..17
LIST_ITEM@0..17
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..17
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
LIST@8..17
LIST_ITEM@8..17
LIST_ITEM_INDENT@8..10 " "
LIST_ITEM_BULLET@10..12 "+ "
LIST_ITEM_CONTENT@12..17
PARAGRAPH@12..17
TEXT@12..17 "item2"
"###
);
insta::assert_debug_snapshot!(
to_list("+ item1\nitem2").syntax,
@r###"
LIST@0..8
LIST_ITEM@0..8
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..8
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
"###
);
insta::assert_debug_snapshot!(
to_list("+ item1\n\n still item 1").syntax,
@r###"
LIST@0..23
LIST_ITEM@0..23
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..23
PARAGRAPH@2..9
TEXT@2..8 "item1\n"
BLANK_LINE@8..9 "\n"
PARAGRAPH@9..23
TEXT@9..23 " still item 1"
"###
);
let list = to_list(
r#"+ item1
+ item2
"#,
);
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..26
LIST_ITEM@0..26
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..2 "+ "
LIST_ITEM_CONTENT@2..26
PARAGRAPH@2..8
TEXT@2..8 "item1\n"
LIST@8..26
LIST_ITEM@8..26
LIST_ITEM_INDENT@8..14 " "
LIST_ITEM_BULLET@14..16 "+ "
LIST_ITEM_CONTENT@16..26
PARAGRAPH@16..26
TEXT@16..22 "item2\n"
BLANK_LINE@22..26 " "
"###
);
let list = to_list(
r#"1. item1
- item2
3. item 3"#,
);
assert!(list.is_ordered());
insta::assert_debug_snapshot!(
list.syntax,
@r###"
LIST@0..32
LIST_ITEM@0..23
LIST_ITEM_INDENT@0..0 ""
LIST_ITEM_BULLET@0..3 "1. "
LIST_ITEM_CONTENT@3..23
PARAGRAPH@3..10
TEXT@3..9 "item1\n"
BLANK_LINE@9..10 "\n"
LIST@10..23
LIST_ITEM@10..23
LIST_ITEM_INDENT@10..14 " "
LIST_ITEM_BULLET@14..16 "- "
LIST_ITEM_CONTENT@16..22
PARAGRAPH@16..22
TEXT@16..22 "item2\n"
BLANK_LINE@22..23 "\n"
LIST_ITEM@23..32
LIST_ITEM_INDENT@23..23 ""
LIST_ITEM_BULLET@23..26 "3. "
LIST_ITEM_CONTENT@26..32
PARAGRAPH@26..32
TEXT@26..32 "item 3"
"###
);
// nested list
insta::assert_debug_snapshot!(
to_list(" + item1\n\n + item2").syntax,
@r###"
LIST@0..20
LIST_ITEM@0..11
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..4 "+ "
LIST_ITEM_CONTENT@4..10
PARAGRAPH@4..10
TEXT@4..10 "item1\n"
BLANK_LINE@10..11 "\n"
LIST_ITEM@11..20
LIST_ITEM_INDENT@11..13 " "
LIST_ITEM_BULLET@13..15 "+ "
LIST_ITEM_CONTENT@15..20
PARAGRAPH@15..20
TEXT@15..20 "item2"
"###
);
insta::assert_debug_snapshot!(
to_list(" 1. item1\n 2. item2\n 3. item3").syntax,
@r###"
LIST@0..42
LIST_ITEM@0..42
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..5 "1. "
LIST_ITEM_CONTENT@5..42
PARAGRAPH@5..11
TEXT@5..11 "item1\n"
LIST@11..28
LIST_ITEM@11..28
LIST_ITEM_INDENT@11..19 " "
LIST_ITEM_BULLET@19..22 "2. "
LIST_ITEM_CONTENT@22..28
PARAGRAPH@22..28
TEXT@22..28 "item2\n"
LIST@28..42
LIST_ITEM@28..42
LIST_ITEM_INDENT@28..34 " "
LIST_ITEM_BULLET@34..37 "3. "
LIST_ITEM_CONTENT@37..42
PARAGRAPH@37..42
TEXT@37..42 "item3"
"###
);
// Indentation of lines within other greater elements do not count
insta::assert_debug_snapshot!(
to_list(" 1. item1\n #+begin_example\nhello\n#+end_example\n").syntax,
@r###"
LIST@0..51
LIST_ITEM@0..51
LIST_ITEM_INDENT@0..2 " "
LIST_ITEM_BULLET@2..5 "1. "
LIST_ITEM_CONTENT@5..51
PARAGRAPH@5..11
TEXT@5..11 "item1\n"
EXAMPLE_BLOCK@11..51
BLOCK_BEGIN@11..31
WHITESPACE@11..15 " "
TEXT@15..23 "#+begin_"
TEXT@23..30 "example"
NEW_LINE@30..31 "\n"
BLOCK_CONTENT@31..37
TEXT@31..37 "hello\n"
BLOCK_END@37..51
TEXT@37..43 "#+end_"
TEXT@43..50 "example"
NEW_LINE@50..51 "\n"
"###
);
to_list("- ");
to_list("-\t");
to_list("-\r");
to_list("-\t\n");
to_list("-\r\n");
to_list("-");
let config = &ParseConfig::default();
assert!(list_node(("-a", config).into()).is_err());
assert!(list_node(("*\r\n", config).into()).is_err());
assert!(list_node(("* ", config).into()).is_err());
}

View file

@ -1,103 +0,0 @@
use nom::{
bytes::complete::{take_until, take_while1},
combinator::{map, opt, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{
l_curly3_token, l_parens_token, node, r_curly3_token, r_parens_token, GreenElement,
},
input::Input,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn macros_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_curly3_token,
verify(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &Input| s.as_bytes()[0].is_ascii_alphabetic(),
),
opt(tuple((l_parens_token, take_until(")}}}"), r_parens_token))),
r_curly3_token,
)),
|(l_curly3, name, argument, r_curly3)| {
let mut children = vec![];
children.push(l_curly3);
children.push(name.text_token());
if let Some((l_parens, argument, r_parens)) = argument {
children.extend([l_parens, argument.text_token(), r_parens]);
}
children.push(r_curly3);
node(MACROS, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn test() {
use crate::{ast::Macros, tests::to_ast, ParseConfig};
let to_macros = to_ast::<Macros>(macros_node);
insta::assert_debug_snapshot!(
to_macros("{{{title}}}").syntax,
@r###"
MACROS@0..11
L_CURLY3@0..3 "{{{"
TEXT@3..8 "title"
R_CURLY3@8..11 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{one_arg_macro(1)}}}").syntax,
@r###"
MACROS@0..22
L_CURLY3@0..3 "{{{"
TEXT@3..16 "one_arg_macro"
L_PARENS@16..17 "("
TEXT@17..18 "1"
R_PARENS@18..19 ")"
R_CURLY3@19..22 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{two_arg_macro(1, 2)}}}").syntax,
@r###"
MACROS@0..25
L_CURLY3@0..3 "{{{"
TEXT@3..16 "two_arg_macro"
L_PARENS@16..17 "("
TEXT@17..21 "1, 2"
R_PARENS@21..22 ")"
R_CURLY3@22..25 "}}}"
"###
);
insta::assert_debug_snapshot!(
to_macros("{{{two_arg_macro(1\\,a, 2)}}}").syntax,
@r###"
MACROS@0..28
L_CURLY3@0..3 "{{{"
TEXT@3..16 "two_arg_macro"
L_PARENS@16..17 "("
TEXT@17..24 "1\\,a, 2"
R_PARENS@24..25 ")"
R_CURLY3@25..28 "}}}"
"###
);
let config = &ParseConfig::default();
assert!(macros_node(("{{{0uthor}}}", config).into()).is_err());
assert!(macros_node(("{{{author}}", config).into()).is_err());
assert!(macros_node(("{{{poem(}}}", config).into()).is_err());
assert!(macros_node(("{{{poem)}}}", config).into()).is_err());
}

View file

@ -1,315 +0,0 @@
//! Org-mode elements
pub mod block;
pub mod clock;
pub mod combinator;
pub mod comment;
pub mod cookie;
pub mod document;
pub mod drawer;
pub mod dyn_block;
pub mod element;
pub mod emphasis;
pub mod entity;
pub mod fixed_width;
pub mod fn_def;
pub mod fn_ref;
pub mod headline;
pub mod inline_call;
pub mod inline_src;
pub mod input;
pub mod keyword;
pub mod latex_environment;
pub mod latex_fragment;
pub mod line_break;
pub mod link;
pub mod list;
pub mod macros;
pub mod object;
pub mod paragraph;
pub mod planning;
pub mod radio_target;
pub mod rule;
pub mod snippet;
pub mod subscript_superscript;
pub mod table;
pub mod target;
pub mod timestamp;
use rowan::Language;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OrgLanguage;
impl Language for OrgLanguage {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind {
// SAFETY: SyntaxKind is `repr(u16)`
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind {
rowan::SyntaxKind(kind as u16)
}
}
pub type SyntaxNode = rowan::SyntaxNode<OrgLanguage>;
pub type SyntaxToken = rowan::SyntaxToken<OrgLanguage>;
pub type SyntaxElement = rowan::SyntaxElement<OrgLanguage>;
pub type SyntaxNodeChildren = rowan::SyntaxNodeChildren<OrgLanguage>;
pub type SyntaxElementChildren = rowan::SyntaxElementChildren<OrgLanguage>;
#[allow(bad_style)]
#[allow(clippy::all)]
#[non_exhaustive]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
#[repr(u16)]
pub enum SyntaxKind {
//
// token
//
L_BRACKET, // '['
R_BRACKET, // ']'
L_BRACKET2, // '[['
R_BRACKET2, // ']]'
L_PARENS, // '('
R_PARENS, // ')'
L_ANGLE, // '<'
R_ANGLE, // '>'
L_CURLY, // '{'
R_CURLY, // '}'
L_CURLY3, // '{{{'
R_CURLY3, // '}}}'
L_ANGLE2, // '<<'
R_ANGLE2, // '>>'
L_ANGLE3, // '<<<'
R_ANGLE3, // '>>>'
AT, // '@'
AT2, // '@@'
PERCENT, // '%'
PERCENT2, // '%%'
SLASH, // '/'
BACKSLASH, // '\'
DOLLAR, // '$'
DOLLAR2, // '$$'
UNDERSCORE, // '_'
STAR, // '*'
PLUS, // '+'
MINUS, // '-'
MINUS2, // '--'
COLON, // ':'
COLON2, // '::'
EQUAL, // '='
TILDE, // '~'
HASH, // '#'
HASH_PLUS, // '#+'
DOUBLE_ARROW, // '=>'
PIPE, // '|'
COMMA, // ','
CARET, // '^'
NEW_LINE, // '\n' or '\r\n' or '\r'
WHITESPACE, // ' ' or '\t'
BLANK_LINE,
TEXT,
DOCUMENT,
SECTION,
PARAGRAPH,
HEADLINE,
HEADLINE_STARS,
HEADLINE_TITLE,
HEADLINE_KEYWORD_TODO,
HEADLINE_KEYWORD_DONE,
HEADLINE_PRIORITY,
HEADLINE_TAGS,
PROPERTY_DRAWER,
NODE_PROPERTY,
PLANNING,
PLANNING_DEADLINE,
PLANNING_SCHEDULED,
PLANNING_CLOSED,
//
// elements
//
/* table */
ORG_TABLE,
ORG_TABLE_RULE_ROW,
ORG_TABLE_STANDARD_ROW,
ORG_TABLE_CELL,
/* list */
LIST,
LIST_ITEM,
LIST_ITEM_INDENT,
LIST_ITEM_BULLET,
LIST_ITEM_COUNTER,
LIST_ITEM_CHECK_BOX,
LIST_ITEM_TAG,
LIST_ITEM_CONTENT,
/* drawer */
DRAWER,
DRAWER_BEGIN,
DRAWER_END,
KEYWORD,
BABEL_CALL,
AFFILIATED_KEYWORD,
TABLE_EL,
CLOCK,
FN_DEF,
COMMENT,
RULE,
FIXED_WIDTH,
/* dyn block */
DYN_BLOCK,
DYN_BLOCK_BEGIN,
DYN_BLOCK_END,
/* block */
SPECIAL_BLOCK,
QUOTE_BLOCK,
CENTER_BLOCK,
VERSE_BLOCK,
COMMENT_BLOCK,
EXAMPLE_BLOCK,
EXPORT_BLOCK,
SOURCE_BLOCK,
SOURCE_BLOCK_LANG,
BLOCK_BEGIN,
BLOCK_END,
BLOCK_CONTENT,
SRC_BLOCK_SWITCHES,
SRC_BLOCK_LANGUAGE,
SRC_BLOCK_PARAMETERS,
EXPORT_BLOCK_TYPE,
LATEX_ENVIRONMENT,
//
// objects
//
INLINE_CALL,
INLINE_SRC,
LINK,
LINK_PATH,
LINE_BREAK,
COOKIE,
RADIO_TARGET,
FN_REF,
LATEX_FRAGMENT,
MACROS,
SNIPPET,
TARGET,
BOLD,
STRIKE,
ITALIC,
UNDERLINE,
VERBATIM,
CODE,
ENTITY,
SUPERSCRIPT,
SUBSCRIPT,
/* timestamp */
TIMESTAMP_ACTIVE,
TIMESTAMP_INACTIVE,
TIMESTAMP_DIARY,
// timestamp tokens
TIMESTAMP_YEAR,
TIMESTAMP_MONTH,
TIMESTAMP_DAY,
TIMESTAMP_HOUR,
TIMESTAMP_MINUTE,
TIMESTAMP_DAYNAME,
// for repeater or delay
TIMESTAMP_REPEATER_MARK,
TIMESTAMP_DELAY_MARK,
TIMESTAMP_VALUE,
TIMESTAMP_UNIT,
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(value: SyntaxKind) -> Self {
OrgLanguage::kind_to_raw(value)
}
}
impl SyntaxKind {
/// whether this node is [object](https://orgmode.org/worg/org-syntax.html#Objects)
pub fn is_object(&self) -> bool {
matches!(
self,
SyntaxKind::ENTITY
| SyntaxKind::LATEX_FRAGMENT
| SyntaxKind::SNIPPET
| SyntaxKind::FN_REF
| SyntaxKind::INLINE_CALL
| SyntaxKind::INLINE_SRC
| SyntaxKind::LINE_BREAK
| SyntaxKind::LINK
| SyntaxKind::MACROS
| SyntaxKind::RADIO_TARGET
| SyntaxKind::COOKIE
| SyntaxKind::SUPERSCRIPT
| SyntaxKind::SUBSCRIPT
| SyntaxKind::ORG_TABLE_CELL
| SyntaxKind::TIMESTAMP_ACTIVE
| SyntaxKind::TIMESTAMP_INACTIVE
| SyntaxKind::TIMESTAMP_DIARY
| SyntaxKind::BOLD
| SyntaxKind::ITALIC
| SyntaxKind::UNDERLINE
| SyntaxKind::VERBATIM
| SyntaxKind::CODE
| SyntaxKind::STRIKE
)
}
/// whether this node is [element](https://orgmode.org/worg/org-syntax.html#Elements)
pub fn is_element(&self) -> bool {
matches!(self, SyntaxKind::HEADLINE | SyntaxKind::SECTION)
|| self.is_lesser_element()
|| self.is_greater_element()
}
/// whether this node is [lesser element](https://orgmode.org/worg/org-syntax.html#Lesser_Elements)
pub fn is_lesser_element(&self) -> bool {
matches!(
self,
SyntaxKind::COMMENT_BLOCK
| SyntaxKind::EXAMPLE_BLOCK
| SyntaxKind::EXPORT_BLOCK
| SyntaxKind::SOURCE_BLOCK
| SyntaxKind::VERSE_BLOCK
| SyntaxKind::CLOCK
| SyntaxKind::PLANNING
| SyntaxKind::COMMENT
| SyntaxKind::FIXED_WIDTH
| SyntaxKind::RULE
| SyntaxKind::KEYWORD
| SyntaxKind::AFFILIATED_KEYWORD
| SyntaxKind::BABEL_CALL
| SyntaxKind::LATEX_ENVIRONMENT
| SyntaxKind::NODE_PROPERTY
| SyntaxKind::PARAGRAPH
| SyntaxKind::ORG_TABLE_RULE_ROW
| SyntaxKind::ORG_TABLE_STANDARD_ROW
)
}
/// whether this node is [greater element](https://orgmode.org/worg/org-syntax.html#Greater_Elements)
pub fn is_greater_element(&self) -> bool {
matches!(
self,
SyntaxKind::CENTER_BLOCK
| SyntaxKind::QUOTE_BLOCK
| SyntaxKind::SPECIAL_BLOCK
| SyntaxKind::DRAWER
| SyntaxKind::DYN_BLOCK
| SyntaxKind::FN_DEF
| SyntaxKind::LIST_ITEM
| SyntaxKind::LIST
| SyntaxKind::PROPERTY_DRAWER
| SyntaxKind::ORG_TABLE
)
}
}

View file

@ -1,366 +0,0 @@
use nom::{IResult, InputTake};
use super::{
combinator::GreenElement,
cookie::cookie_node,
emphasis::{
self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node,
},
entity::entity_node,
fn_ref::fn_ref_node,
inline_call::inline_call_node,
inline_src::inline_src_node,
input::Input,
latex_fragment::latex_fragment_node,
line_break::line_break_node,
link::link_node,
macros::macros_node,
radio_target::radio_target_node,
snippet::snippet_node,
subscript_superscript::{self, subscript_node, superscript_node},
target::target_node,
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
};
struct ObjectPositions<'a> {
input: Input<'a>,
pos: usize,
finder: jetscii::BytesConst,
}
impl ObjectPositions<'_> {
fn standard(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'@', /* snippet */
b'<', /* timestamp, target, radio target */
b'[', /* link, cookie, fn_ref, timestamp */
b'c', /* inline call */
b's', /* inline source */
b'\\', b'$', /* latex & entity */
b'{', /* macros */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
fn minimal(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
fn link_description(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'@', /* snippet */
b'c', /* inline call */
b's', /* inline source */
b'{', /* macros */
b'[', /* cookie */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
}
impl<'a> Iterator for ObjectPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.input.len() < 2 || self.pos >= self.input.len() {
return None;
}
let previous = self.pos;
let i = self.finder.find(&self.input.as_bytes()[self.pos..])?;
let p = self.pos + i;
self.pos = p + 1;
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
"{} < {} < {}",
previous,
self.pos,
self.input.s.len()
);
// a valid object requires at least two characters
if self.input.s.len() - p < 2 {
return None;
}
Some(self.input.take_split(p))
}
}
/// parse minimal sets of objects, including
/// - LaTeX fragments ('\\')
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
/// - Entities ('\\')
/// - Superscripts and Subscripts
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::minimal,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
/// parses standard sets of objects, including
///
/// - Entities
/// - LaTeX Fragments
/// - Export Snippets
/// - Footnote References
/// - Inline Babel Calls
/// - Inline Source Blocks
/// - Links
/// - Macros
/// - Targets and Radio Targets
/// - Statistics Cookies
/// - Timestamps
/// - Text Markup (bold code strike verbatim underline italic)
/// - Line Breaks
/// - Subscript and Superscript
///
/// // todo:
/// - Citations
pub fn standard_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::standard,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))
.or_else(|_| timestamp_active_node(i)),
b'[' => cookie_node(i)
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
// NOTE: although not specified in document, inline call and inline src follows the
// same pre tokens rule as text markup
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
pub fn link_description_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::link_description,
|i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] {
b'@' => snippet_node(i),
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'{' => macros_node(i),
b'[' => cookie_node(i),
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec<GreenElement>
where
F: Fn(Input) -> ObjectPositions,
P: Fn(Input<'a>, Input<'a>) -> IResult<Input<'a>, GreenElement, ()>,
{
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in position(i) {
debug_assert!(
input.s.len() >= 2,
"object must have at least two characters: {:?}",
input.s
);
if let Ok((input, pre)) = parse(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
nodes.push(pre);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
continue 'l;
}
}
nodes.push(i.text_token());
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
"parser must be lossless"
);
nodes
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
let vec = ObjectPositions::standard(("*", &config).into()).collect::<Vec<_>>();
assert!(vec.is_empty());
let vec = ObjectPositions::standard(("*{", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "*{");
// https://github.com/PoiScript/orgize/issues/69
let vec = ObjectPositions::standard(("{3}", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "{3}");
let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 7);
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
assert_eq!(vec[2].0.s, "//s\nc<<");
assert_eq!(vec[3].0.s, "/s\nc<<");
assert_eq!(vec[4].0.s, "s\nc<<");
assert_eq!(vec[5].0.s, "c<<");
assert_eq!(vec[6].0.s, "<<");
}
#[test]
fn parse() {
use crate::{
syntax::{combinator::node, SyntaxKind, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = standard_object_nodes((input, config).into());
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
};
insta::assert_debug_snapshot!(
t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"),
@r###"
PARAGRAPH@0..93
CODE@0..26
TILDE@0..1 "~"
TEXT@1..25 "org-inlinetask-min-level"
TILDE@25..26 "~"
FN_REF@26..93
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "oiml"
COLON@34..35 ":"
TEXT@35..57 "The default value of \n"
CODE@57..83
TILDE@57..58 "~"
TEXT@58..82 "org-inlinetask-min-level"
TILDE@82..83 "~"
TEXT@83..87 " is "
VERBATIM@87..91
EQUAL@87..88 "="
TEXT@88..90 "15"
EQUAL@90..91 "="
TEXT@91..92 "."
R_BRACKET@92..93 "]"
"###
);
insta::assert_debug_snapshot!(
t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003.
The canonical parser is =org-element.el=, which provides a number of
functions starting with ~org-element-~."#),
@r###"
PARAGRAPH@0..175
TEXT@0..9 "Org is a "
ITALIC@9..34
SLASH@9..10 "/"
TEXT@10..33 "plaintext markup syntax"
SLASH@33..34 "/"
TEXT@34..50 " developed with "
BOLD@50..57
STAR@50..51 "*"
TEXT@51..56 "Emacs"
STAR@56..57 "*"
TEXT@57..91 " in 2003.\nThe canonic ..."
VERBATIM@91..107
EQUAL@91..92 "="
TEXT@92..106 "org-element.el"
EQUAL@106..107 "="
TEXT@107..160 ", which provides a nu ..."
CODE@160..174
TILDE@160..161 "~"
TEXT@161..173 "org-element-"
TILDE@173..174 "~"
TEXT@174..175 "."
"###
);
insta::assert_debug_snapshot!(
t("a^abc"),
@r###"
PARAGRAPH@0..5
TEXT@0..1 "a"
SUPERSCRIPT@1..5
CARET@1..2 "^"
TEXT@2..5 "abc"
"###
);
}

View file

@ -1,101 +0,0 @@
use nom::{IResult, InputTake};
use super::{
combinator::{blank_lines, line_ends_iter, node, GreenElement},
input::Input,
keyword::affiliated_keyword_nodes,
object::standard_object_nodes,
SyntaxKind,
};
/// Recognizes one paragraph
pub fn paragraph_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(paragraph_node_base, input)
}
/// Recognizes multiple paragraphs
pub fn paragraph_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
let mut i = input;
let mut children = vec![];
while !i.is_empty() {
let (input, node) = paragraph_node(i)?;
children.push(node);
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len());
i = input;
}
Ok(children)
}
fn paragraph_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
let (input, keywords) = affiliated_keyword_nodes(input)?;
let mut start = 0;
for idx in line_ends_iter(input.as_str()) {
// stops at blank line
if input.s[start..idx].bytes().all(|c| c.is_ascii_whitespace()) {
break;
}
start = idx;
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.extend(keywords);
children.extend(standard_object_nodes(contents));
children.extend(post_blank);
Ok((input, node(SyntaxKind::PARAGRAPH, children)))
}
#[test]
fn parse() {
use crate::{ast::Paragraph, tests::to_ast};
let to_paragraph = to_ast::<Paragraph>(paragraph_node);
insta::assert_debug_snapshot!(
to_paragraph(r#"a"#).syntax,
@r###"
PARAGRAPH@0..1
TEXT@0..1 "a"
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
"#).syntax,
@r###"
PARAGRAPH@0..6
TEXT@0..2 "a\n"
BLANK_LINE@2..6 " "
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
b
c
"#).syntax,
@r###"
PARAGRAPH@0..6
TEXT@0..6 "a\nb\nc\n"
"###
);
insta::assert_debug_snapshot!(
to_paragraph(r#"a
c
"#).syntax,
@r###"
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3 "\n"
"###
);
}

View file

@ -1,91 +0,0 @@
use nom::{
branch::alt, bytes::complete::tag, character::complete::space0, combinator::iterator,
sequence::tuple, IResult,
};
use super::{
combinator::{eol_or_eof, GreenElement, NodeBuilder},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind::*,
};
pub fn planning_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
crate::lossless_parser!(planning_node_base, input)
}
fn planning_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let mut it = iterator(
input,
tuple((
space0,
alt((tag("DEADLINE:"), tag("SCHEDULED:"), tag("CLOSED:"))),
space0,
alt((timestamp_active_node, timestamp_inactive_node)),
)),
);
let start_len = b.len();
it.for_each(|(ws, text, ws_, timestamp)| {
let mut b_ = NodeBuilder::new();
b_.ws(ws);
b_.text(text);
b_.ws(ws_);
b_.push(timestamp);
b.push(b_.finish(match text.as_str() {
"DEADLINE:" => PLANNING_DEADLINE,
"SCHEDULED:" => PLANNING_SCHEDULED,
"CLOSED:" => PLANNING_CLOSED,
_ => unreachable!(),
}));
});
if b.len() == start_len {
return Err(nom::Err::Error(()));
}
let (input, _) = it.finish()?;
let (input, ws) = space0(input)?;
let (input, nl) = eol_or_eof(input)?;
b.ws(ws);
b.nl(nl);
Ok((input, b.finish(PLANNING)))
}
#[test]
fn prase() {
use crate::{ast::Planning, tests::to_ast, ParseConfig};
let to_planning = to_ast::<Planning>(planning_node);
insta::assert_debug_snapshot!(
to_planning("SCHEDULED: <2019-04-08 Mon>").syntax,
@r###"
PLANNING@0..27
PLANNING_SCHEDULED@0..27
TEXT@0..10 "SCHEDULED:"
WHITESPACE@10..11 " "
TIMESTAMP_ACTIVE@11..27
L_ANGLE@11..12 "<"
TIMESTAMP_YEAR@12..16 "2019"
MINUS@16..17 "-"
TIMESTAMP_MONTH@17..19 "04"
MINUS@19..20 "-"
TIMESTAMP_DAY@20..22 "08"
WHITESPACE@22..23 " "
TIMESTAMP_DAYNAME@23..26 "Mon"
R_ANGLE@26..27 ">"
"###
);
let config = &ParseConfig::default();
assert!(planning_node((" ", config).into()).is_err());
assert!(planning_node((" SCHEDULED: ", config).into()).is_err());
}

View file

@ -1,83 +0,0 @@
use nom::{
bytes::complete::take_while,
combinator::{map, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{l_angle3_token, node, r_angle3_token, GreenElement},
input::Input,
object::minimal_object_nodes,
SyntaxKind::*,
};
pub fn radio_target_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_angle3_token,
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &Input| {
s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ')
},
),
r_angle3_token,
)),
|(l_angle3, contents, r_angle3)| {
let mut children = vec![l_angle3];
children.extend(minimal_object_nodes(contents));
children.push(r_angle3);
node(RADIO_TARGET, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::RadioTarget, tests::to_ast, ParseConfig};
let to_radio_target = to_ast::<RadioTarget>(radio_target_node);
insta::assert_debug_snapshot!(
to_radio_target("<<<target>>>").syntax,
@r###"
RADIO_TARGET@0..12
L_ANGLE3@0..3 "<<<"
TEXT@3..9 "target"
R_ANGLE3@9..12 ">>>"
"###
);
insta::assert_debug_snapshot!(
to_radio_target("<<<tar get>>>").syntax,
@r###"
RADIO_TARGET@0..13
L_ANGLE3@0..3 "<<<"
TEXT@3..10 "tar get"
R_ANGLE3@10..13 ">>>"
"###
);
insta::assert_debug_snapshot!(
to_radio_target("<<<\\alpha>>>").syntax,
@r###"
RADIO_TARGET@0..12
L_ANGLE3@0..3 "<<<"
ENTITY@3..9
BACKSLASH@3..4 "\\"
TEXT@4..9 "alpha"
R_ANGLE3@9..12 ">>>"
"###
);
let config = &ParseConfig::default();
assert!(radio_target_node(("<<<target >>>", config).into()).is_err());
assert!(radio_target_node(("<<< target>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta<get>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta>get>>>", config).into()).is_err());
assert!(radio_target_node(("<<<ta\nget>>>", config).into()).is_err());
assert!(radio_target_node(("<<<target>>", config).into()).is_err());
}

View file

@ -1,88 +0,0 @@
use nom::{
bytes::complete::take_while_m_n, character::complete::space0, combinator::map, sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
SyntaxKind::*,
};
pub fn rule_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
space0,
take_while_m_n(5, usize::max_value(), |c| c == '-'),
space0,
eol_or_eof,
blank_lines,
)),
|(ws, dashes, ws_, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(dashes);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(RULE)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::Rule, tests::to_ast, ParseConfig};
let to_rule = to_ast::<Rule>(rule_node);
insta::assert_debug_snapshot!(
to_rule("-----").syntax,
@r###"
RULE@0..5
TEXT@0..5 "-----"
"###
);
insta::assert_debug_snapshot!(
to_rule("--------").syntax,
@r###"
RULE@0..8
TEXT@0..8 "--------"
"###
);
insta::assert_debug_snapshot!(
to_rule("-----\n\n\n").syntax,
@r###"
RULE@0..8
TEXT@0..5 "-----"
NEW_LINE@5..6 "\n"
BLANK_LINE@6..7 "\n"
BLANK_LINE@7..8 "\n"
"###
);
insta::assert_debug_snapshot!(
to_rule("----- \n").syntax,
@r###"
RULE@0..8
TEXT@0..5 "-----"
WHITESPACE@5..7 " "
NEW_LINE@7..8 "\n"
"###
);
let config = &ParseConfig::default();
assert!(rule_node(("", config).into()).is_err());
assert!(rule_node(("----", config).into()).is_err());
assert!(rule_node(("None----", config).into()).is_err());
assert!(rule_node(("None ----", config).into()).is_err());
assert!(rule_node(("None------", config).into()).is_err());
assert!(rule_node(("----None----", config).into()).is_err());
assert!(rule_node(("\t\t----", config).into()).is_err());
assert!(rule_node(("------None", config).into()).is_err());
assert!(rule_node(("----- None", config).into()).is_err());
}

View file

@ -1,92 +0,0 @@
use nom::{
bytes::complete::{take_until, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{at2_token, colon_token, node, GreenElement},
input::Input,
SyntaxKind::*,
};
pub fn snippet_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
at2_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
colon_token,
take_until("@@"),
at2_token,
)),
|(at2, name, colon, value, at2_)| {
node(
SNIPPET,
[at2, name.text_token(), colon, value.text_token(), at2_],
)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::Snippet, tests::to_ast, ParseConfig};
let to_snippet = to_ast::<Snippet>(snippet_node);
insta::assert_debug_snapshot!(
to_snippet("@@html:<b>@@").syntax,
@r###"
SNIPPET@0..12
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..10 "<b>"
AT2@10..12 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@latex:any arbitrary LaTeX code@@").syntax,
@r###"
SNIPPET@0..34
AT2@0..2 "@@"
TEXT@2..7 "latex"
COLON@7..8 ":"
TEXT@8..32 "any arbitrary LaTeX code"
AT2@32..34 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@html:@@").syntax,
@r###"
SNIPPET@0..9
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..7 ""
AT2@7..9 "@@"
"###
);
insta::assert_debug_snapshot!(
to_snippet("@@html:<p>@</p>@@").syntax,
@r###"
SNIPPET@0..17
AT2@0..2 "@@"
TEXT@2..6 "html"
COLON@6..7 ":"
TEXT@7..15 "<p>@</p>"
AT2@15..17 "@@"
"###
);
let config = &ParseConfig::default();
assert!(snippet_node(("@@html:<b>@", config).into()).is_err());
assert!(snippet_node(("@@html<b>@@", config).into()).is_err());
assert!(snippet_node(("@@:<b>@@", config).into()).is_err());
}

View file

@ -1,161 +0,0 @@
use memchr::memchr2_iter;
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
combinator::opt,
IResult, InputTake,
};
use crate::{
syntax::{
combinator::{caret_token, underscore_token},
object::standard_object_nodes,
},
SyntaxKind,
};
use super::{
combinator::{l_curly_token, node, r_curly_token, GreenElement},
input::Input,
};
pub fn superscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, caret) = caret_token(input)?;
let mut children = vec![caret];
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
children.push(star.text_token());
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
if let Some(s) = sign {
children.push(s)
}
children.push(contents);
Ok((input, node(SyntaxKind::SUPERSCRIPT, children)))
} else {
Err(nom::Err::Error(()))
}
}
pub fn subscript_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, underscore) = underscore_token(input)?;
let mut children = vec![underscore];
if let Ok((input, star)) = tag::<&str, Input, ()>("*")(input) {
children.push(star.text_token());
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (l, contents, r))) = template1(input) {
children.push(l);
children.extend(standard_object_nodes(contents));
children.push(r);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else if let Ok((input, (sign, contents))) = template2(input) {
if let Some(s) = sign {
children.push(s)
}
children.push(contents);
Ok((input, node(SyntaxKind::SUBSCRIPT, children)))
} else {
Err(nom::Err::Error(()))
}
}
fn template1(input: Input) -> IResult<Input, (GreenElement, Input, GreenElement), ()> {
let (input, l) = l_curly_token(input)?;
let (input, contents) = balanced_brackets(input)?;
let (input, r) = r_curly_token(input)?;
Ok((input, (l, contents, r)))
}
fn template2(input: Input) -> IResult<Input, (Option<GreenElement>, GreenElement), ()> {
let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?;
let (input, contents) =
take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?;
if contents.s.ends_with(|c: char| !c.is_alphanumeric()) {
return Err(nom::Err::Error(()));
}
Ok((input, (sign.map(|x| x.text_token()), contents.text_token())))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'{', b'}', bytes) {
if bytes[i] == b'{' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(nom::Err::Error(()))
}
pub fn verify_pre(s: &str) -> bool {
if s.is_empty() {
return false;
}
let last = s.as_bytes()[s.len() - 1];
last != b' ' && last != b'\t'
}
#[test]
fn parse() {
use crate::ast::Subscript;
use crate::tests::to_ast;
let to_subscript = to_ast::<Subscript>(subscript_node);
insta::assert_debug_snapshot!(
to_subscript("_*").syntax,
@r###"
SUBSCRIPT@0..2
UNDERSCORE@0..1 "_"
TEXT@1..2 "*"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_{*bo\nld*}").syntax,
@r###"
SUBSCRIPT@0..10
UNDERSCORE@0..1 "_"
L_CURLY@1..2 "{"
BOLD@2..9
STAR@2..3 "*"
TEXT@3..8 "bo\nld"
STAR@8..9 "*"
R_CURLY@9..10 "}"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_+123").syntax,
@r###"
SUBSCRIPT@0..5
UNDERSCORE@0..1 "_"
TEXT@1..2 "+"
TEXT@2..5 "123"
"###
);
insta::assert_debug_snapshot!(
to_subscript("_abc").syntax,
@r###"
SUBSCRIPT@0..4
UNDERSCORE@0..1 "_"
TEXT@1..4 "abc"
"###
);
}

View file

@ -1,259 +0,0 @@
use nom::{
bytes::complete::take_while,
character::complete::{multispace0, space0},
combinator::iterator,
sequence::tuple,
Err, IResult, InputTake, Slice,
};
use super::{
combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder},
input::Input,
keyword::tblfm_keyword_nodes,
object::standard_object_nodes,
SyntaxKind::*,
};
fn org_table_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut children = vec![];
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = input.slice(start..i);
let trimmed = line.as_str().trim_start_matches([' ', '\t']);
// Org tables end at the first line not starting with a vertical bar.
if !trimmed.starts_with('|') {
break;
}
if trimmed.starts_with("|-") {
children.push(node(ORG_TABLE_RULE_ROW, [line.text_token()]));
} else {
children.push(table_standard_row_node(line)?);
}
start = i;
}
if start == 0 {
return Err(nom::Err::Error(()));
}
let input = input.slice(start..);
let (input, tblfm) = tblfm_keyword_nodes(input)?;
let (input, post_blank) = blank_lines(input)?;
children.extend(tblfm);
children.extend(post_blank);
Ok((input, node(ORG_TABLE, children)))
}
fn table_standard_row_node(input: Input) -> Result<GreenElement, nom::Err<()>> {
let mut b = NodeBuilder::new();
let (input, ws) = space0(input)?;
b.ws(ws);
let mut it = iterator(
input,
tuple((pipe_token, multispace0, take_while(|c: char| c != '|'))),
);
it.for_each(|(pipe, ws, input)| {
b.push(pipe);
b.ws(ws);
if input.is_empty() {
return;
}
match input
.as_bytes()
.iter()
.rposition(|b| !b.is_ascii_whitespace())
{
Some(idx) => {
let (ws, cell) = input.take_split(idx + 1);
b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell)));
b.ws(ws);
}
_ => {
b.push(node(ORG_TABLE_CELL, standard_object_nodes(input)));
}
}
});
let (input, _) = it.finish()?;
debug_assert!(input.is_empty());
Ok(b.finish(ORG_TABLE_STANDARD_ROW))
}
fn table_el_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut start = 0;
for i in line_ends_iter(input.as_str()) {
let line = &input.s[start..i];
let trimmed = line.trim();
if start == 0 {
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !trimmed.starts_with("+-") || trimmed.bytes().any(|c| c != b'+' && c != b'-') {
return Err(Err::Error(()));
}
}
// Table.el tables end at the first line not starting with either a vertical line or a plus sign.
if !trimmed.starts_with('|') && !trimmed.starts_with('+') {
break;
}
start = i;
}
let (input, contents) = input.take_split(start);
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.push(contents.text_token());
children.extend(post_blank);
Ok((input, node(TABLE_EL, children)))
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn org_table_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(org_table_node_base, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn table_el_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(table_el_node_base, input)
}
#[test]
fn parse_org_table() {
use crate::{ast::OrgTable, tests::to_ast};
let to_org_table = to_ast::<OrgTable>(org_table_node);
insta::assert_debug_snapshot!(
to_org_table("|").syntax,
@r###"
ORG_TABLE@0..1
ORG_TABLE_STANDARD_ROW@0..1
PIPE@0..1 "|"
"###
);
insta::assert_debug_snapshot!(
to_org_table(
r#"|
|-
|a
|-
| a |
"#
).syntax,
@r###"
ORG_TABLE@0..20
ORG_TABLE_STANDARD_ROW@0..2
PIPE@0..1 "|"
WHITESPACE@1..2 "\n"
ORG_TABLE_RULE_ROW@2..5
TEXT@2..5 "|-\n"
ORG_TABLE_STANDARD_ROW@5..8
PIPE@5..6 "|"
ORG_TABLE_CELL@6..7
TEXT@6..7 "a"
WHITESPACE@7..8 "\n"
ORG_TABLE_RULE_ROW@8..11
TEXT@8..11 "|-\n"
ORG_TABLE_STANDARD_ROW@11..20
PIPE@11..12 "|"
WHITESPACE@12..15 " "
ORG_TABLE_CELL@15..16
TEXT@15..16 "a"
WHITESPACE@16..18 " "
PIPE@18..19 "|"
WHITESPACE@19..20 "\n"
"###
);
insta::assert_debug_snapshot!(
to_org_table("| a |\n#+tblfm: test").syntax,
@r###"
ORG_TABLE@0..19
ORG_TABLE_STANDARD_ROW@0..6
PIPE@0..1 "|"
WHITESPACE@1..2 " "
ORG_TABLE_CELL@2..3
TEXT@2..3 "a"
WHITESPACE@3..4 " "
PIPE@4..5 "|"
WHITESPACE@5..6 "\n"
KEYWORD@6..19
HASH_PLUS@6..8 "#+"
TEXT@8..13 "tblfm"
COLON@13..14 ":"
TEXT@14..19 " test"
"###
);
insta::assert_debug_snapshot!(
to_org_table("| a |\n#+TBLFM: test1\n#+TBLFM: test2").syntax,
@r###"
ORG_TABLE@0..35
ORG_TABLE_STANDARD_ROW@0..6
PIPE@0..1 "|"
WHITESPACE@1..2 " "
ORG_TABLE_CELL@2..3
TEXT@2..3 "a"
WHITESPACE@3..4 " "
PIPE@4..5 "|"
WHITESPACE@5..6 "\n"
KEYWORD@6..21
HASH_PLUS@6..8 "#+"
TEXT@8..13 "TBLFM"
COLON@13..14 ":"
TEXT@14..20 " test1"
NEW_LINE@20..21 "\n"
KEYWORD@21..35
HASH_PLUS@21..23 "#+"
TEXT@23..28 "TBLFM"
COLON@28..29 ":"
TEXT@29..35 " test2"
"###
);
}
#[test]
fn parse_table_el() {
use crate::{ast::TableEl, tests::to_ast, ParseConfig};
let to_table_el = to_ast::<TableEl>(table_el_node);
insta::assert_debug_snapshot!(
to_table_el(
r#" +---+
| |
+---+
"#
).syntax,
@r###"
TABLE_EL@0..37
TEXT@0..32 " +---+\n | |\n ..."
BLANK_LINE@32..33 "\n"
BLANK_LINE@33..37 " "
"###
);
let config = &ParseConfig::default();
assert!(table_el_node(("", config).into()).is_err());
assert!(table_el_node(("+----|---", config).into()).is_err());
}

View file

@ -1,66 +0,0 @@
use nom::{
bytes::complete::take_while,
combinator::{map, verify},
sequence::tuple,
IResult,
};
use super::{
combinator::{l_angle2_token, node, r_angle2_token, GreenElement},
input::Input,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn target_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_angle2_token,
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &Input| {
s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ')
},
),
r_angle2_token,
)),
|(l_angle2, target, r_angle2)| node(TARGET, [l_angle2, target.text_token(), r_angle2]),
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::Target, tests::to_ast, ParseConfig};
let to_target = to_ast::<Target>(target_node);
insta::assert_debug_snapshot!(
to_target("<<target>>").syntax,
@r###"
TARGET@0..10
L_ANGLE2@0..2 "<<"
TEXT@2..8 "target"
R_ANGLE2@8..10 ">>"
"###
);
insta::assert_debug_snapshot!(
to_target("<<tar get>>").syntax,
@r###"
TARGET@0..11
L_ANGLE2@0..2 "<<"
TEXT@2..9 "tar get"
R_ANGLE2@9..11 ">>"
"###
);
let config = &ParseConfig::default();
assert!(target_node(("<<target >>", config).into()).is_err());
assert!(target_node(("<< target>>", config).into()).is_err());
assert!(target_node(("<<ta<get>>", config).into()).is_err());
assert!(target_node(("<<ta>get>>", config).into()).is_err());
assert!(target_node(("<<ta\nget>>", config).into()).is_err());
assert!(target_node(("<<target>", config).into()).is_err());
}

View file

@ -1,348 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_till, take_while1, take_while_m_n},
character::complete::{digit1, space0, space1},
combinator::{iterator, map, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{
colon_token, l_angle_token, l_bracket_token, l_parens_token, minus2_token, minus_token,
node, percent2_token, r_angle_token, r_bracket_token, r_parens_token, GreenElement,
NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn timestamp_diary_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_angle_token,
percent2_token,
l_parens_token,
take_till(|c| c == ')' || c == '>' || c == '\n'),
r_parens_token,
r_angle_token,
)),
|(l_angle, percent2, l_paren, value, r_paren, r_angle)| {
node(
TIMESTAMP_DIARY,
[
l_angle,
percent2,
l_paren,
value.text_token(),
r_paren,
r_angle,
],
)
},
);
crate::lossless_parser!(parser, input)
}
fn date(i: Input) -> IResult<Input, [GreenElement; 5], ()> {
map(
tuple((
take_while_m_n(4, 4, |c: char| c.is_ascii_digit()),
minus_token,
take_while_m_n(2, 2, |c: char| c.is_ascii_digit()),
minus_token,
take_while_m_n(2, 2, |c: char| c.is_ascii_digit()),
)),
|(year, minus, month, minus_, day)| {
[
year.token(TIMESTAMP_YEAR),
minus,
month.token(TIMESTAMP_MONTH),
minus_,
day.token(TIMESTAMP_DAY),
]
},
)(i)
}
fn dayname(i: Input) -> IResult<Input, GreenElement, ()> {
map(
take_while1(|c: char| {
!c.is_ascii_whitespace()
&& !c.is_ascii_digit()
&& c != '+'
&& c != '-'
&& c != ']'
&& c != '>'
&& c != '.'
}),
|i: Input| i.token(TIMESTAMP_DAYNAME),
)(i)
}
fn time(i: Input) -> IResult<Input, [GreenElement; 3], ()> {
map(
tuple((
take_while_m_n(2, 2, |c: char| c.is_ascii_digit()),
colon_token,
take_while_m_n(2, 2, |c: char| c.is_ascii_digit()),
)),
|(hour, colon, minute)| {
[
hour.token(TIMESTAMP_HOUR),
colon,
minute.token(TIMESTAMP_MINUTE),
]
},
)(i)
}
fn repeater_or_delay(
input: Input,
) -> IResult<Input, (GreenElement, GreenElement, GreenElement), ()> {
let (input, mark) = alt((
map(alt((tag("++"), tag("+"), tag(".+"))), |i: Input| {
i.token(TIMESTAMP_REPEATER_MARK)
}),
map(alt((tag("--"), tag("-"))), |i: Input| {
i.token(TIMESTAMP_DELAY_MARK)
}),
))(input)?;
let (input, value) = digit1(input)?;
let (input, unit) = alt((tag("h"), tag("d"), tag("w"), tag("m"), tag("y")))(input)?;
Ok((
input,
(
mark,
value.token(TIMESTAMP_VALUE),
unit.token(TIMESTAMP_UNIT),
),
))
}
fn timestamp_node_base(
input: Input,
l_parser: impl Fn(Input) -> IResult<Input, GreenElement, ()>,
r_parser: impl Fn(Input) -> IResult<Input, GreenElement, ()>,
) -> IResult<Input, Vec<GreenElement>, ()> {
let (input, l_angle) = l_parser(input)?;
let (input, start_date) = date(input)?;
let (input, start_dayname) = opt(tuple((space1, dayname)))(input)?;
let (input, start_time) = opt(tuple((space1, time)))(input)?;
let mut b = NodeBuilder::new();
b.push(l_angle);
b.children.extend(start_date);
if let Some((ws, dayname)) = start_dayname {
b.push(ws.ws_token());
b.push(dayname);
}
if input.as_str().starts_with('-') {
let (ws, start_time) = match start_time {
Some(start_time) => start_time,
None => return Err(nom::Err::Error(())),
};
let (input, minus) = minus_token(input)?;
let (input, end_time) = time(input)?;
b.ws(ws);
b.children.extend(start_time);
b.push(minus);
b.children.extend(end_time);
let mut iter = iterator(input, tuple((space1, repeater_or_delay)));
for (ws, (mark, value, unit)) in &mut iter {
b.children.extend([ws.ws_token(), mark, value, unit]);
}
let (input, _) = iter.finish()?;
let (input, space) = space0(input)?;
let (input, r_angle) = r_parser(input)?;
b.ws(space);
b.push(r_angle);
return Ok((input, b.children));
}
if let Some((ws, start_time)) = start_time {
b.ws(ws);
b.children.extend(start_time);
}
let mut iter = iterator(input, tuple((space1, repeater_or_delay)));
for (ws, (mark, value, unit)) in &mut iter {
b.children.extend([ws.ws_token(), mark, value, unit]);
}
let (input, _) = iter.finish()?;
let (input, space) = space0(input)?;
let (input, r_angle) = r_parser(input)?;
b.ws(space);
b.push(r_angle);
if input.as_str().starts_with("--") {
let (input, minus2) = minus2_token(input)?;
let (input, l_angle) = l_parser(input)?;
let (input, end_date) = date(input)?;
let (input, end_dayname) = opt(tuple((space1, dayname)))(input)?;
let (input, end_time) = opt(tuple((space1, time)))(input)?;
b.children.extend([minus2, l_angle]);
b.children.extend(end_date);
if let Some((ws, dayname)) = end_dayname {
b.push(ws.ws_token());
b.push(dayname);
}
if let Some((ws, end_time)) = end_time {
b.ws(ws);
b.children.extend(end_time);
}
let mut iter = iterator(input, tuple((space1, repeater_or_delay)));
for (ws, (mark, value, unit)) in &mut iter {
b.children.extend([ws.ws_token(), mark, value, unit]);
}
let (input, _) = iter.finish()?;
let (input, space_) = space0(input)?;
let (input, r_angle) = r_parser(input)?;
b.ws(space_);
b.push(r_angle);
Ok((input, b.children))
} else {
Ok((input, b.children))
}
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn timestamp_active_node(input: Input) -> IResult<Input, GreenElement, ()> {
fn parser(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, children) = timestamp_node_base(input, l_angle_token, r_angle_token)?;
Ok((input, node(TIMESTAMP_ACTIVE, children)))
}
crate::lossless_parser!(parser, input)
}
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
pub fn timestamp_inactive_node(input: Input) -> IResult<Input, GreenElement, ()> {
fn parser(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, children) = timestamp_node_base(input, l_bracket_token, r_bracket_token)?;
Ok((input, node(TIMESTAMP_INACTIVE, children)))
}
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::{ast::Timestamp, tests::to_ast};
let to_timestamp = to_ast::<Timestamp>(timestamp_inactive_node);
to_timestamp("[2003-09-16]");
to_timestamp("[2003-09-16 09:09]");
to_timestamp("[2003-09-16 Tue]");
to_timestamp("[2003-09-16 Tue 09:09]");
to_timestamp("[2003-09-16]--[2003-09-16]");
to_timestamp("[2003-09-16 09:09]--[2003-09-16 09:09]");
to_timestamp("[2003-09-16]--[2003-09-16 09:09]");
to_timestamp("[2003-09-16 Tue]--[2003-09-16 Tue]");
to_timestamp("[2003-09-16 Tue 09:09]--[2003-09-16 Tue 09:09]");
to_timestamp("[2003-09-16 Tue 09:09-09:09]");
to_timestamp("[2003-09-16 09:09-09:09 ]");
to_timestamp("[2003-09-16 09:09 +1w .+1d]");
to_timestamp("[2003-09-16 09:09]--[2003-09-16 +1w .+1d --1d ]");
to_timestamp("[2003-09-16 Tue 09:09 +1w]--[2003-09-16 .+1d --1d ]");
to_timestamp("[2003-09-16 09:09-10:19 +1w --1d]");
let ts = to_timestamp("[2003-09-16 Tue +1w]");
assert!(!ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..20
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
WHITESPACE@15..16 " "
TIMESTAMP_REPEATER_MARK@16..17 "+"
TIMESTAMP_VALUE@17..18 "1"
TIMESTAMP_UNIT@18..19 "w"
R_BRACKET@19..20 "]"
"###
);
let ts = to_timestamp("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]");
assert!(ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..46
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
WHITESPACE@15..16 " "
TIMESTAMP_HOUR@16..18 "09"
COLON@18..19 ":"
TIMESTAMP_MINUTE@19..21 "39"
R_BRACKET@21..22 "]"
MINUS2@22..24 "--"
L_BRACKET@24..25 "["
TIMESTAMP_YEAR@25..29 "2003"
MINUS@29..30 "-"
TIMESTAMP_MONTH@30..32 "09"
MINUS@32..33 "-"
TIMESTAMP_DAY@33..35 "16"
WHITESPACE@35..36 " "
TIMESTAMP_DAYNAME@36..39 "Tue"
WHITESPACE@39..40 " "
TIMESTAMP_HOUR@40..42 "10"
COLON@42..43 ":"
TIMESTAMP_MINUTE@43..45 "39"
R_BRACKET@45..46 "]"
"###
);
let ts = to_timestamp("[2003-09-16 Tue 09:39-10:39]");
assert!(ts.is_range());
insta::assert_debug_snapshot!(
ts.syntax,
@r###"
TIMESTAMP_INACTIVE@0..28
L_BRACKET@0..1 "["
TIMESTAMP_YEAR@1..5 "2003"
MINUS@5..6 "-"
TIMESTAMP_MONTH@6..8 "09"
MINUS@8..9 "-"
TIMESTAMP_DAY@9..11 "16"
WHITESPACE@11..12 " "
TIMESTAMP_DAYNAME@12..15 "Tue"
WHITESPACE@15..16 " "
TIMESTAMP_HOUR@16..18 "09"
COLON@18..19 ":"
TIMESTAMP_MINUTE@19..21 "39"
MINUS@21..22 "-"
TIMESTAMP_HOUR@22..24 "10"
COLON@24..25 ":"
TIMESTAMP_MINUTE@25..27 "39"
R_BRACKET@27..28 "]"
"###
);
}

View file

@ -1,24 +0,0 @@
//! test utils
use nom::IResult;
use rowan::{ast::AstNode, SyntaxNode};
use crate::{
syntax::{combinator::GreenElement, input::Input},
ParseConfig,
};
pub fn to_ast<N: AstNode>(
parser: impl Fn(Input) -> IResult<Input, GreenElement, ()>,
) -> impl Fn(&str) -> N {
move |s: &str| {
let input = Input {
s,
c: &ParseConfig::default(),
};
let element = parser(input).unwrap().1;
let node = element.into_node().unwrap();
let node = SyntaxNode::<N::Language>::new_root(node);
AstNode::cast(node).unwrap()
}
}