diff --git a/.cargo/config.toml b/.cargo/config.toml deleted file mode 100644 index 70f9eae..0000000 --- a/.cargo/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[registries.crates-io] -protocol = "sparse" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 94540b8..33c1965 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,16 @@ version: 2 updates: - - package-ecosystem: cargo - directory: "/" - schedule: - interval: weekly - time: "09:00" - open-pull-requests-limit: 10 +- package-ecosystem: cargo + directory: "/" + schedule: + interval: weekly + time: "09:00" + open-pull-requests-limit: 10 + ignore: + - dependency-name: pretty_assertions + versions: + - 0.7.1 + - dependency-name: nom + versions: + - 6.1.0 + - 6.1.1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 5746014..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: CI - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@master - - - name: Setup rust - uses: dtolnay/rust-toolchain@stable - with: - components: clippy, rustfmt - - - run: | - cargo fmt -- --check - cargo test --all-features - cargo clippy - - gh-pages: - if: github.ref == 'refs/heads/v0.10' - - permissions: - contents: read - pages: write - id-token: write - - runs-on: ubuntu-latest - - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup Pages - uses: actions/configure-pages@v3 - - - name: Install - run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - - - name: Build - run: wasm-pack build -t web -d ./dist --out-name orgize ./wasm/ - - - name: Upload artifact - uses: actions/upload-pages-artifact@v2 - with: - path: "./wasm" - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v2 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..7679d25 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,39 @@ +name: Rust + +on: + pull_request: + push: + branches: + - master + +jobs: + format: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Run rustfmt + run: cargo fmt -- --check + + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Cache target/ + uses: actions/cache@v1 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} + + - name: Run Test + run: cargo test --all-features diff --git a/.gitignore b/.gitignore index 081fe7a..4ca2515 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,6 @@ **/*.rs.bk Cargo.lock -.vscode +benches/*.org .gdb_history perf.data* diff --git a/Cargo.toml b/Cargo.toml index 3fdaa8f..a50f744 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,53 +1,39 @@ -[workspace] -resolver = "2" -members = [".", "./wasm"] - [package] name = "orgize" -version = "0.10.0-alpha.10" +version = "0.8.4" authors = ["PoiScript "] +description = "A Rust library for parsing orgmode files." repository = "https://github.com/PoiScript/orgize" -edition = "2021" -license = "MIT" -description = "A Rust library for parsing org-mode files." readme = "README.md" -keywords = ["orgmode", "org-mode", "emacs", "parser"] +edition = "2018" +license = "MIT" +keywords = ["orgmode", "emacs", "parser"] [package.metadata.docs.rs] all-features = true +[badges] +travis-ci = { repository = "PoiScript/orgize" } + [features] -default = [] -indexmap = ["dep:indexmap"] -chrono = ["dep:chrono"] -tracing = ["dep:tracing"] -syntax-org-fc = [] +default = ["ser"] +ser = ["serde", "serde_indextree", "indexmap/serde-1"] [dependencies] -bytecount = "0.6" -cfg-if = "1.0.0" -chrono = { version = "0.4", optional = true } -indexmap = { version = "2.1", optional = true } -jetscii = "0.5" -memchr = "2.5" -nom = { version = "7.1", default-features = false, features = ["std"] } -rowan = "0.15" -tracing = { version = "0.1", optional = true } +bytecount = "0.6.0" +chrono = { version = "0.4.11", optional = true } +indextree = "4.0.0" +jetscii = "0.4.4" +lazy_static = "1.4.0" +memchr = "2.3.3" +# we don't need to parse any float number, so lexical crate is redundant +nom = { version = "5.1.1", default-features = false, features = ["std"] } +serde = { version = "1.0.106", optional = true, features = ["derive"] } +serde_indextree = { version = "0.2.0", optional = true } +syntect = { version = "4.1.0", optional = true } +indexmap = { version = "1.3.2", features = ["serde-1"], optional = true} [dev-dependencies] -criterion = "0.5" -insta = "1.29" -slugify = "0.1" -tracing-subscriber = { version = "0.3", features = ["fmt"] } - -[[bench]] -name = "parse" -harness = false - -[[example]] -name = "parse" -required-features = ["tracing"] - -[profile.dev.package] -insta.opt-level = 3 -similar.opt-level = 3 +pretty_assertions = "0.6.1" +serde_json = "1.0.51" +slugify = "0.1.0" diff --git a/LICENSE b/LICENSE index f7cb351..0d477d3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2023 Alex Lin (poi) +Copyright (c) 2019-2020 Alex Lin (poi) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index bf7a1d9..8074a44 100644 --- a/README.md +++ b/README.md @@ -1,119 +1,210 @@ # Orgize +[![Build Status](https://travis-ci.org/PoiScript/orgize.svg?branch=master)](https://travis-ci.org/PoiScript/orgize) [![Crates.io](https://img.shields.io/crates/v/orgize.svg)](https://crates.io/crates/orgize) -[![Documentation](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize) -[![Build status](https://img.shields.io/github/actions/workflow/status/PoiScript/orgize/ci.yml)](https://github.com/PoiScript/orgize/actions/workflows/ci.yml) -![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg) +[![Document](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize) -A Rust library for parsing org-mode files. +A Rust library for parsing orgmode files. -Live Demo: +[Live demo](https://orgize.herokuapp.com/) ## Parse -To parse a org-mode string, simply invoking the `Org::parse` function: - -```rust -use orgize::{Org, rowan::ast::AstNode}; - -let org = Org::parse("* DONE Title :tag:"); -assert_eq!( - format!("{:#?}", org.document().syntax()), - r#"DOCUMENT@0..18 - HEADLINE@0..18 - HEADLINE_STARS@0..1 "*" - WHITESPACE@1..2 " " - HEADLINE_KEYWORD_DONE@2..6 "DONE" - WHITESPACE@6..7 " " - HEADLINE_TITLE@7..13 - TEXT@7..13 "Title " - HEADLINE_TAGS@13..18 - COLON@13..14 ":" - TEXT@14..17 "tag" - COLON@17..18 ":" -"#); -``` - -use `ParseConfig::parse` to specific a custom parse config - -```rust -use orgize::{Org, ParseConfig, ast::Headline}; - -let config = ParseConfig { - // custom todo keywords - todo_keywords: (vec!["TASK".to_string()], vec![]), - ..Default::default() -}; -let org = config.parse("* TASK Title 1"); -let hdl = org.first_node::().unwrap(); -assert_eq!(hdl.todo_keyword().unwrap(), "TASK"); -``` - -## Traverse - -Use `org.traverse(&mut traversal)` to walk through the syntax tree. - -```rust -use orgize::{ - export::{from_fn, Container, Event}, - Org, -}; - -let mut hdl_count = 0; -let mut handler = from_fn(|event| { - if matches!(event, Event::Enter(Container::Headline(_))) { - hdl_count += 1; - } -}); -Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler); -assert_eq!(hdl_count, 3); -``` - -## Modify - -Use `org.replace_range(TextRange::new(start, end), "new_text")` to modify the syntax tree: - -```rust -use orgize::{Org, ParseConfig, ast::Headline, TextRange}; - -let mut org = Org::parse("hello\n* world"); - -let hdl = org.first_node::().unwrap(); -org.replace_range(hdl.text_range(), "** WORLD!"); - -let hdl = org.first_node::().unwrap(); -assert_eq!(hdl.level(), 2); - -org.replace_range(TextRange::up_to(hdl.start()), ""); -assert_eq!(org.to_org(), "** WORLD!"); -``` - -## Render to html - -Call the `Org::to_html` function to export org element tree to html: +To parse a orgmode string, simply invoking the `Org::parse` function: ```rust use orgize::Org; +Org::parse("* DONE Title :tag:"); +``` + +or `Org::parse_custom`: + +``` rust +use orgize::{Org, ParseConfig}; + +Org::parse_custom( + "* TASK Title 1", + &ParseConfig { + // custom todo keywords + todo_keywords: (vec!["TASK".to_string()], vec![]), + ..Default::default() + }, +); +``` + +## Iter + +`Org::iter` function will returns an iterator of `Event`s, which is +a simple wrapper of `Element`. + +```rust +use orgize::Org; + +for event in Org::parse("* DONE Title :tag:").iter() { + // handling the event +} +``` + +**Note**: whether an element is container or not, it will appears twice in one loop. +One as `Event::Start(element)`, one as `Event::End(element)`. + +## Render html + +You can call the `Org::write_html` function to generate html directly, which +uses the `DefaultHtmlHandler` internally: + +```rust +use orgize::Org; + +let mut writer = Vec::new(); +Org::parse("* title\n*section*").write_html(&mut writer).unwrap(); + assert_eq!( - Org::parse("* title\n*section*").to_html(), + String::from_utf8(writer).unwrap(), "

title

section

" ); ``` -Checkout `examples/html-slugify.rs` on how to customizing html export process. +## Render html with custom `HtmlHandler` + +To customize html rendering, simply implementing `HtmlHandler` trait and passing +it to the `Org::wirte_html_custom` function. + +The following code demonstrates how to add a id for every headline and return +own error type while rendering. + +```rust +use std::convert::From; +use std::io::{Error as IOError, Write}; +use std::string::FromUtf8Error; + +use orgize::export::{DefaultHtmlHandler, HtmlHandler}; +use orgize::{Element, Org}; +use slugify::slugify; + +#[derive(Debug)] +enum MyError { + IO(IOError), + Heading, + Utf8(FromUtf8Error), +} + +// From trait is required for custom error type +impl From for MyError { + fn from(err: IOError) -> Self { + MyError::IO(err) + } +} + +impl From for MyError { + fn from(err: FromUtf8Error) -> Self { + MyError::Utf8(err) + } +} + +#[derive(Default)] +struct MyHtmlHandler(DefaultHtmlHandler); + +impl HtmlHandler for MyHtmlHandler { + fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { + if let Element::Title(title) = element { + if title.level > 6 { + return Err(MyError::Heading); + } else { + write!( + w, + "", + title.level, + slugify!(&title.raw), + )?; + } + } else { + // fallthrough to default handler + self.0.start(w, element)?; + } + Ok(()) + } + + fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { + if let Element::Title(title) = element { + write!(w, "", title.level)?; + } else { + self.0.end(w, element)?; + } + Ok(()) + } +} + +fn main() -> Result<(), MyError> { + let mut writer = Vec::new(); + let mut handler = MyHtmlHandler::default(); + Org::parse("* title\n*section*").wirte_html_custom(&mut writer, &mut handler)?; + + assert_eq!( + String::from_utf8(writer)?, + "

title

\ +

section

" + ); + + Ok(()) +} +``` + +**Note**: as I mentioned above, each element will appears two times while iterating. +And handler will silently ignores all end events from non-container elements. + +So if you want to change how a non-container element renders, just redefine the `start` +function and leave the `end` function unchanged. + +## Serde + +`Org` struct have already implemented serde's `Serialize` trait. It means you can +serialize it into any format supported by serde, such as json: + +```rust +use orgize::Org; +use serde_json::{json, to_string}; + +let org = Org::parse("I 'm *bold*."); +println!("{}", to_string(&org).unwrap()); + +// { +// "type": "document", +// "children": [{ +// "type": "section", +// "children": [{ +// "type": "paragraph", +// "children":[{ +// "type": "text", +// "value":"I 'm " +// }, { +// "type": "bold", +// "children":[{ +// "type": "text", +// "value": "bold" +// }] +// }, { +// "type":"text", +// "value":"." +// }] +// }] +// }] +// } +``` ## Features -- **`chrono`**: adds the ability to convert `Timestamp` into `chrono::NaiveDateTime`, disabled by default. +By now, orgize provides four features: -- **`indexmap`**: adds the ability to convert `PropertyDrawer` properties into `IndexMap`, disabled by default. ++ `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. -## API compatibility ++ `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default. -`element.syntax()` exposes access to the internal syntax tree, along with some rowan low-level APIs. -This can be useful for intricate tasks. ++ `syntect`: provides `SyntectHtmlHandler` for highlighting code block, disabled by default. -However, the structure of the internal syntax tree can change between different versions of the library. -Because of this, the result of `element.syntax()` doesn't follow semantic versioning, -which means updates might break your code if it relies on this method. ++ `indexmap`: Uses `IndexMap` instead of `HashMap` for properties to preserve their order, disabled by default. + +## License + +MIT diff --git a/benches/.gitignore b/benches/.gitignore deleted file mode 100644 index 448d1fb..0000000 --- a/benches/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.org \ No newline at end of file diff --git a/benches/parse.rs b/benches/parse.rs index 4549d95..0666be2 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,39 +1,30 @@ -use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +#![feature(test)] + +extern crate test; use orgize::Org; +use test::Bencher; -const INPUT: &[(&str, &str)] = &[ - ("doc.org", include_str!("./doc.org")), - ("org-faq.org", include_str!("./org-faq.org")), - ("org-hacks.org", include_str!("./org-hacks.org")), - ( - "org-release-notes.org", - include_str!("./org-release-notes.org"), - ), - ("org-syntax.org", include_str!("./org-syntax.org")), -]; - -pub fn bench_parse(c: &mut Criterion) { - let mut group = c.benchmark_group("Org::parse"); - - for (id, org) in INPUT { - group.throughput(Throughput::Bytes(org.len() as u64)); - group.bench_with_input(*id, org, |b, i| b.iter(|| Org::parse(i))); - } - - group.finish(); +#[bench] +fn org_syntax(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/dev/org-syntax.org + b.iter(|| { + Org::parse(include_str!("org-syntax.org")); + }) } -pub fn bench_to_html(c: &mut Criterion) { - let mut group = c.benchmark_group("Org::to_html"); - - for (id, org) in INPUT { - group.throughput(Throughput::Bytes(org.len() as u64)); - group.bench_with_input(*id, &Org::parse(org), |b, i| b.iter(|| i.to_html())); - } - - group.finish(); +#[bench] +fn doc(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/doc.org + b.iter(|| { + Org::parse(include_str!("doc.org")); + }) } -criterion_group!(benches, bench_parse, bench_to_html); -criterion_main!(benches); +#[bench] +fn org_faq(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/org-faq.org + b.iter(|| { + Org::parse(include_str!("org-faq.org")); + }) +} diff --git a/development.md b/development.md deleted file mode 100644 index bcd343c..0000000 --- a/development.md +++ /dev/null @@ -1,44 +0,0 @@ -## Format, test, lint - -```shell -cargo fmt -- --check -cargo test --all-features -cargo clippy --allow-dirty --allow-staged -``` - -## Update snapshot testing - -```shell -cargo install cargo-insta -cargo insta test --all-features -cargo insta review -``` - -## Fuzz testing - -```shell -cargo install cargo-fuzz -rustup default nightly -cargo fuzz run fuzz_target_1 -``` - -## Benchmark - -```shell -curl -q https://orgmode.org/worg/doc.org --output ./benches/doc.org -curl -q https://orgmode.org/worg/org-faq.org --output ./benches/org-faq.org -curl -q https://orgmode.org/worg/org-hacks.org --output ./benches/org-hacks.org -curl -q https://orgmode.org/worg/org-release-notes.org --output ./benches/org-release-notes.org -curl -q https://orgmode.org/worg/org-syntax.org --output ./benches/org-syntax.org -curl -q https://raw.githubusercontent.com/bzg/org-mode/main/doc/org-manual.org --output ./benches/org-manual.org - -cargo bench --bench parse -``` - -## Benchmark w/ flamegraph - -```shell -cargo install flamegraph -cargo flamegraph --bench parse -o baseline.svg -- --bench -# then open baseline.svg with your browser -``` diff --git a/docs/STATUS.md b/docs/STATUS.md index d0318b8..6788561 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -4,7 +4,7 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Headline - [X] Objects insides headline title -- [x] Affiliated Keywords +- [ ] Affiliated Keywords ## Greater Elements - [x] Greater Blocks @@ -15,10 +15,10 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [ ] Objects insides inlinetask title - [x] Plain Lists and Items - [x] Nested List - - [x] Nested List Indentation - - [x] Tag - - [x] Counter - - [x] Counter set + - [ ] Nested List Indentation + - [ ] Tag + - [ ] Counter + - [ ] Counter set - [X] Property Drawers - [X] Tables @@ -26,25 +26,25 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Babel Call - [x] Blocks - - [x] Escape characters (`#`,`*`, etc) + - [ ] Escape characters (`#`,`*`, etc) - [ ] Line numbers - [X] Clock, Diary Sexp and Planning - [x] Comments - [x] Fixed Width Areas - [x] Horizontal Rules - [x] Keywords -- [x] LaTeX Environments +- [ ] LaTeX Environments - [X] Node Properties - [x] Paragraphs - [X] Table Rows ## Objects -- [x] Entities and LaTeX Fragments +- [ ] Entities and LaTeX Fragments - [x] Export Snippets - [x] Footnote References - [x] Inline Babel Calls and Source Blocks -- [x] Line Breaks +- [ ] Line Breaks - [x] Links - [x] Regular link - [ ] Plain link @@ -53,22 +53,17 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Macros - [x] Targets and Radio Targets - [x] Statistics Cookies -- [x] Subscript and Superscript +- [ ] Subscript and Superscript - [X] Table Cells - [x] Timestamps - [x] Text Markup - - [x] bold - - [x] italic - - [x] underline - - [x] verbatim - - [x] code - - [x] strike-through ## Export -- [x] HTML `Org::to_html` -- [X] Org `Org::to_org` +- [x] HTML +- [X] Org - [ ] LaTeX +- [X] JSON, (via Serde) ## Extra diff --git a/docs/SYNTAX.md b/docs/SYNTAX.md new file mode 100644 index 0000000..a2ac5b2 --- /dev/null +++ b/docs/SYNTAX.md @@ -0,0 +1,942 @@ +# Table of Contents + +1. [Headlines and Sections](#Headlines_and_Sections) +2. [Affiliated Keywords](#Affiliated_keywords) +3. [Greater Elements](#Greater_Elements) + 1. [Greater Blocks](#Greater_Blocks) + 2. [Drawers and Property Drawers](#Drawers) + 3. [Dynamic Blocks](#Dynamic_Blocks) + 4. [Footnote Definitions](#Footnote_Definitions) + 5. [Inlinetasks](#Inlinetasks) + 6. [Plain Lists and Items](#Plain_Lists_and_Items) + 7. [Property Drawers](#Property_Drawers) + 8. [Tables](#Tables) +4. [Elements](#Elements) + 1. [Babel Call](#Babel_Call) + 2. [Blocks](#Blocks) + 3. [Clock, Diary Sexp and Planning](#Clock,_Diary_Sexp_and_Planning) + 4. [Comments](#Comments) + 5. [Fixed Width Areas](#Fixed_Width_Areas) + 6. [Horizontal Rules](#Horizontal_Rules) + 7. [Keywords](#Keywords) + 8. [LaTeX Environments](#LaTeX_Environments) + 9. [Node Properties](#Node_Properties) + 10. [Paragraphs](#Paragraphs) + 11. [Table Rows](#Table_Rows) +5. [Objects](#Objects) + 1. [Entities and LaTeX Fragments](#Entities_and_LaTeX_Fragments) + 2. [Export Snippets](#Export_Snippets) + 3. [Footnote References](#Footnote_References) + 4. [Inline Babel Calls and Source + Blocks](#Inline_Babel_Calls_and_Source_Blocks) + 5. [Line Breaks](#Line_Breaks) + 6. [Links](#Links) + 7. [Macros](#Macros) + 8. [Targets and Radio Targets](#Targets_and_Radio_Targets) + 9. [Statistics Cookies](#Statistics_Cookies) + 10. [Subscript and Superscript](#Subscript_and_Superscript) + 11. [Table Cells](#Table_Cells) + 12. [Timestamps](#Timestamp) + 13. [Text Markup](#Emphasis_Markers) + +This document describes and comments Org syntax as it is currently read by its +parser (Org Elements) and, therefore, by the export framework. It also includes +a few comments on that syntax. + +A core concept in this syntax is that only headlines, sections, planning lines +and property drawers are context-free1, 2. Every other syntactical part only exists within +specific environments. + +Three categories are used to classify these environments: **Greater elements**, +**elements**, and **objects**, from the broadest scope to the narrowest. The +word **element** is used for both Greater and non-Greater elements, the context +should make that clear. + +The paragraph is the unit of measurement. An element defines syntactical parts +that are at the same level as a paragraph, i.e. which cannot contain or be +included in a paragraph. An object is a part that could be included in an +element. Greater elements are all parts that can contain an element. + +Empty lines belong to the largest element ending before them. For example, in a +list, empty lines between items belong are part of the item before them, but +empty lines at the end of a list belong to the plain list element. + +Unless specified otherwise, case is not significant. + + + +# Headlines and Sections + +A headline is defined as: + + STARS KEYWORD PRIORITY TITLE TAGS + +STARS is a string starting at column 0, containing at least one asterisk (and up +to `org-inlinetask-min-level` if `org-inlinetask` library is loaded) and ended +by a space character. The number of asterisks is used to define the level of the +headline. It's the sole compulsory part of a headline. + +KEYWORD is a TODO keyword, which has to belong to the list defined in +`org-todo-keywords-1`. Case is significant. + +PRIORITY is a priority cookie, i.e. a single letter preceded by a hash sign # +and enclosed within square brackets. + +TITLE can be made of any character but a new line. Though, it will match after +every other part have been matched. + +TAGS is made of words containing any alpha-numeric character, underscore, at +sign, hash sign or percent sign, and separated with colons. + +Examples of valid headlines include: + + * + + ** DONE + + *** Some e-mail + + **** TODO [#A] COMMENT Title :tag:a2%: + +If the first word appearing in the title is `COMMENT`, the headline will be +considered as **commented**. Case is significant. + +If its title is `org-footnote-section`, it will be considered as a **footnote +section**. Case is significant. + +If `ARCHIVE` is one of its tags, it will be considered as **archived**. Case is +significant. + +A headline contains directly one section (optionally), followed by any number of +deeper level headlines. + +A section contains directly any greater element or element. Only a headline can +contain a section. As an exception, text before the first headline in the +document also belongs to a section. + +As an example, consider the following document: + + An introduction. + + * A Headline + + Some text. + + ** Sub-Topic 1 + + ** Sub-Topic 2 + + *** Additional entry + +Its internal structure could be summarized as: + + (document + (section) + (headline + (section) + (headline) + (headline + (headline)))) + + + +# Affiliated Keywords + +With the exception of [inlinetasks](#Inlinetasks), +[items](#Plain_Lists_and_Items), [planning](#Clock,_Diary_Sexp_and_Planning), +[clocks](#Clock,_Diary_Sexp_and_Planning), [node properties](#Node_Properties) +and [table rows](#Table_Rows), every other element type can be assigned +attributes. + +This is done by adding specific keywords, named **affiliated keywords**, just +above the element considered, no blank line allowed. + +Affiliated keywords are built upon one of the following patterns: `#+KEY: VALUE`, +`#+KEY[OPTIONAL]: VALUE` or `#+ATTR_BACKEND: VALUE`. + +KEY is either `CAPTION`, `HEADER`, `NAME`, `PLOT` or `RESULTS` string. + +BACKEND is a string constituted of alpha-numeric characters, hyphens or +underscores. + +OPTIONAL and VALUE can contain any character but a new line. Only `CAPTION` and +`RESULTS` keywords can have an optional value. + +An affiliated keyword can appear more than once if KEY is either `CAPTION` or +`HEADER` or if its pattern is `#+ATTR_BACKEND: VALUE`. + +`CAPTION`, `AUTHOR`, `DATE` and `TITLE` keywords can contain objects in their +value and their optional value, if applicable. + + + +# Greater Elements + +Unless specified otherwise, greater elements can contain directly any other +element or greater element excepted: + +- elements of their own type, +- [node properties](#Node_Properties), which can only be found in [property + drawers](#Property_Drawers), +- [items](#Plain_Lists_and_Items), which can only be found in [plain + lists](#Plain_Lists_and_Items). + + + +## Greater Blocks + +Greater blocks consist in the following pattern: + + #+BEGIN_NAME PARAMETERS + CONTENTS + #+END_NAME + +NAME can contain any non-whitespace character. + +PARAMETERS can contain any character other than new line, and can be omitted. + +If NAME is `CENTER`, it will be a **center block**. If it is `QUOTE`, it will be +a **quote block**. + +If the block is neither a center block, a quote block or a [block +element](#Blocks), it will be a **special block**. + +CONTENTS can contain any element, except : a line `#+END_NAME` on its own. Also +lines beginning with STARS must be quoted by a comma. + + + +## Drawers and Property Drawers + +Pattern for drawers is: + + :NAME: + CONTENTS + :END: + +NAME can contain word-constituent characters, hyphens and underscores. + +CONTENTS can contain any element but another drawer. + + + +## Dynamic Blocks + +Pattern for dynamic blocks is: + + #+BEGIN: NAME PARAMETERS + CONTENTS + #+END: + +NAME cannot contain any whitespace character. + +PARAMETERS can contain any character and can be omitted. + + + +## Footnote Definitions + +Pattern for footnote definitions is: + + [fn:LABEL] CONTENTS + +It must start at column 0. + +LABEL is either a number or follows the pattern `fn:WORD`, where word can +contain any word-constituent character, hyphens and underscore characters. + +CONTENTS can contain any element excepted another footnote definition. It ends +at the next footnote definition, the next headline, two consecutive empty lines +or the end of buffer. + + + +## Inlinetasks + +Inlinetasks are defined by `org-inlinetask-min-level` contiguous asterisk +characters starting at column 0, followed by a whitespace character. + +Optionally, inlinetasks can be ended with a string constituted of +`org-inlinetask-min-level` contiguous asterisk characters starting at column 0, +followed by a space and the `END` string. + +Inlinetasks are recognized only after `org-inlinetask` library is loaded. + + + +## Plain Lists and Items + +Items are defined by a line starting with the following pattern: +`BULLET COUNTER-SET CHECK-BOX TAG`, in which only BULLET is mandatory. + +BULLET is either an asterisk, a hyphen, a plus sign character or follows either +the pattern `COUNTER.` or `COUNTER)`. In any case, BULLET is follwed by a +whitespace character or line ending. + +COUNTER can be a number or a single letter. + +COUNTER-SET follows the pattern [@COUNTER]. + +CHECK-BOX is either a single whitespace character, a `X` character or a hyphen, +enclosed within square brackets. + +TAG follows `TAG-TEXT ::` pattern, where TAG-TEXT can contain any character but +a new line. + +An item ends before the next item, the first line less or equally indented than +its starting line, or two consecutive empty lines. Indentation of lines within +other greater elements do not count, neither do inlinetasks boundaries. + +A plain list is a set of consecutive items of the same indentation. It can only +directly contain items. + +If first item in a plain list has a counter in its bullet, the plain list will +be an **ordered plain-list**. If it contains a tag, it will be a **descriptive +list**. Otherwise, it will be an **unordered list**. List types are mutually +exclusive. + +For example, consider the following excerpt of an Org document: + + 1. item 1 + 2. [X] item 2 + - some tag :: item 2.1 + +Its internal structure is as follows: + + (ordered-plain-list + (item) + (item + (descriptive-plain-list + (item)))) + + + +## Property Drawers + +Property drawers are a special type of drawer containing properties attached to +a headline. They are located right after a [headline](#Headlines_and_Sections) +and its [planning](#Clock,_Diary_Sexp_and_Planning) information. + + HEADLINE + PROPERTYDRAWER + + HEADLINE + PLANNING + PROPERTYDRAWER + +PROPERTYDRAWER follows the pattern + + :PROPERTIES: + CONTENTS + :END: + +where CONTENTS consists of zero or more [node properties](#Node_Properties). + + + +## Tables + +Tables start at lines beginning with either a vertical bar or the `+-` string +followed by plus or minus signs only, assuming they are not preceded with lines +of the same type. These lines can be indented. + +A table starting with a vertical bar has `org` type. Otherwise it has `table.el` +type. + +Org tables end at the first line not starting with a vertical bar. Table.el +tables end at the first line not starting with either a vertical line or a plus +sign. Such lines can be indented. + +An org table can only contain table rows. A table.el table does not contain +anything. + +One or more `#+TBLFM: FORMULAS` lines, where `FORMULAS` can contain any +character, can follow an org table. + + + +# Elements + +Elements cannot contain any other element. + +Only [keywords](#Keywords) whose name belongs to +`org-element-document-properties`, [verse blocks](#Blocks) , +[paragraphs](#Paragraphs) and [table rows](#Table_Rows) can contain objects. + + + +## Babel Call + +Pattern for babel calls is: + + #+CALL: VALUE + +VALUE is optional. It can contain any character but a new line. + + + +## Blocks + +Like [greater blocks](#Greater_Blocks), pattern for blocks is: + + #+BEGIN_NAME DATA + CONTENTS + #+END_NAME + +NAME cannot contain any whitespace character. + +1. If NAME is `COMMENT`, it will be a **comment block**. +2. If it is `EXAMPLE`, it will be an **example block**. +3. If it is `EXPORT`, it will be an **export block**. +4. If it is `SRC`, it will be a **source block**. +5. If it is `VERSE`, it will be a **verse block**. + +DATA can contain any character but a new line. It can be ommitted, unless the +block is either a **source block** or an **export block**. + +In the latter case, it should be constituted of a single word. + +In the former case, it must follow the pattern `LANGUAGE SWITCHES ARGUMENTS`, +where SWITCHES and ARGUMENTS are optional. + +LANGUAGE cannot contain any whitespace character. + +SWITCHES is made of any number of `SWITCH` patterns, separated by blank lines. + +A SWITCH pattern is either `-l FORMAT`, where FORMAT can contain any character +but a double quote and a new line, `-S` or `+S`, where S stands for a single +letter. + +ARGUMENTS can contain any character but a new line. + +CONTENTS can contain any character, including new lines. Though it will only +contain Org objects if the block is a verse block. Otherwise, CONTENTS will not +be parsed. + + + +## Clock, Diary Sexp and Planning + +A clock follows either of the patterns below: + + CLOCK: INACTIVE-TIMESTAMP + CLOCK: INACTIVE-TIMESTAMP-RANGE DURATION + +INACTIVE-TIMESTAMP, resp. INACTIVE-TIMESTAMP-RANGE, is an inactive, resp. +inactive range, [timestamp](#Timestamp) object. + +DURATION follows the pattern: + + => HH:MM + +HH is a number containing any number of digits. MM is a two digit numbers. + +A diary sexp is a line starting at column 0 with `%%(` string. It can then +contain any character besides a new line. + +A planning is an element with the following pattern: + + HEADLINE + PLANNING + +where HEADLINE is a [headline](#Headlines_and_Sections) element and PLANNING is +a line filled with INFO parts, where each of them follows the pattern: + + KEYWORD: TIMESTAMP + +KEYWORD is either `DEADLINE`, `SCHEDULED` or `CLOSED`. TIMESTAMP is a +[timestamp](#Timestamp) object. + +In particular, no blank line is allowed between PLANNING and HEADLINE. + + + +## Comments + +A **comment line** starts with a hash signe and a whitespace character or an end +of line. + +Comments can contain any number of consecutive comment lines. + + + +## Fixed Width Areas + +A **fixed-width line** start with a colon character and a whitespace or an end +of line. + +Fixed width areas can contain any number of consecutive fixed-width lines. + + + +## Horizontal Rules + +A horizontal rule is a line made of at least 5 consecutive hyphens. It can be +indented. + + + +## Keywords + +Keywords follow the syntax: + + #+KEY: VALUE + +KEY can contain any non-whitespace character, but it cannot be equal to `CALL` +or any affiliated keyword. + +VALUE can contain any character excepted a new line. + +If KEY belongs to `org-element-document-properties`, VALUE can contain objects. + + + +## LaTeX Environments + +Pattern for LaTeX environments is: + + \begin{NAME} CONTENTS \end{NAME} + +NAME is constituted of alpha-numeric or asterisk characters. + +CONTENTS can contain anything but the `\end{NAME}` string. + + + +## Node Properties + +Node properties can only exist in [property drawers](#Property_Drawers). Their +pattern is any of the following + + :NAME: VALUE + + :NAME+: VALUE + + :NAME: + + :NAME+: + +NAME can contain any non-whitespace character but cannot end with a plus sign. +It cannot be the empty string. + +VALUE can contain anything but a newline character. + + + +## Paragraphs + +Paragraphs are the default element, which means that any unrecognized context is +a paragraph. + +Empty lines and other elements end paragraphs. + +Paragraphs can contain every type of object. + + + +## Table Rows + +A table rows is either constituted of a vertical bar and any number of [table +cells](#Table_Cells) or a vertical bar followed by a hyphen. + +In the first case the table row has the **standard** type. In the second case, +it has the **rule** type. + +Table rows can only exist in [tables](#Tables). + + + +# Objects + +Objects can only be found in the following locations: + +- [affiliated keywords](#Affiliated_keywords) defined in + `org-element-parsed-keywords`, +- [document properties](#Keywords), +- [headline](#Headlines_and_Sections) titles, +- [inlinetask](#Inlinetasks) titles, +- [item](#Plain_Lists_and_Items) tags, +- [paragraphs](#Paragraphs), +- [table cells](#Table_Cells), +- [table rows](#Table_Rows), which can only contain table cell objects, +- [verse blocks](#Blocks). + +Most objects cannot contain objects. Those which can will be specified. + + + +## Entities and LaTeX Fragments + +An entity follows the pattern: + + \NAME POST + +where NAME has a valid association in either `org-entities` or +`org-entities-user`. + +POST is the end of line, `{}` string, or a non-alphabetical character. It isn't +separated from NAME by a whitespace character. + +A LaTeX fragment can follow multiple patterns: + + \NAME BRACKETS + \(CONTENTS\) + \[CONTENTS\] + $$CONTENTS$$ + PRE$CHAR$POST + PRE$BORDER1 BODY BORDER2$POST + +NAME contains alphabetical characters only and must not have an association in +either **org-entities** or **org-entities-user**. + +BRACKETS is optional, and is not separated from NAME with white spaces. It may +contain any number of the following patterns: + + [CONTENTS1] + {CONTENTS2} + +where CONTENTS1 can contain any characters excepted `{` `}`, `[` `]` and newline +and CONTENTS2 can contain any character excepted `{`, `}` and newline. + +CONTENTS can contain any character but cannot contain `\\)` in the second +template or `\\]` in the third one. + +PRE is either the beginning of line or a character different from `$`. + +CHAR is a non-whitespace character different from `.`, `,`, `?`, `;`, `'` or a +double quote. + +POST is any punctuation (including parentheses and quotes) or space character, +or the end of line. + +BORDER1 is a non-whitespace character different from `.`, `,`, `;` and `$`. + +BODY can contain any character excepted `$`, and may not span over more than 3 +lines. + +BORDER2 is any non-whitespace character different from `,`, `.` and `$`. + +--- + +> It would introduce incompatibilities with previous Org versions, but support +> for `$...$` (and for symmetry, `$$...$$`) constructs ought to be removed. +> +> They are slow to parse, fragile, redundant and imply false positives. — +> ngz + + + +## Export Snippets + +Patter for export snippets is: + + @@NAME:VALUE@@ + +NAME can contain any alpha-numeric character and hyphens. + +VALUE can contain anything but `@@` string. + + + +## Footnote References + +There are four patterns for footnote references: + + [fn:LABEL] + [fn:LABEL:DEFINITION] + [fn::DEFINITION] + +LABEL can contain any word constituent character, hyphens and underscores. + +DEFINITION can contain any character. Though opening and closing square brackets +must be balanced in it. It can contain any object encountered in a paragraph, +even other footnote references. + +If the reference follows the second pattern, it is called an **inline +footnote**. If it follows the third one, i.e. if LABEL is omitted, it is an +**anonymous footnote**. + + + +## Inline Babel Calls and Source Blocks + +Inline Babel calls follow any of the following patterns: + + call_NAME(ARGUMENTS) + call_NAME[HEADER](ARGUMENTS)[HEADER] + +NAME can contain any character besides `(`, `)` and `\n`. + +HEADER can contain any character besides `]` and `\n`. + +ARGUMENTS can contain any character besides `)` and `\n`. + +Inline source blocks follow any of the following patterns: + + src_LANG{BODY} + src_LANG[OPTIONS]{BODY} + +LANG can contain any non-whitespace character. + +OPTIONS and BODY can contain any character but `\n`. + + + +## Line Breaks + +A line break consists in `\\\SPACE` pattern at the end of an otherwise non-empty +line. + +SPACE can contain any number of tabs and spaces, including 0. + + + +## Links + +There are 4 major types of links: + + PRE1 RADIO POST1 ("radio" link) + ("angle" link) + PRE2 PROTOCOL:PATH2 POST2 ("plain" link) + [[PATH3]DESCRIPTION] ("regular" link) + +PRE1 and POST1, when they exist, are non alphanumeric characters. + +RADIO is a string matched by some [radio target](#Targets_and_Radio_Targets). It +may contain [entities](#Entities_and_LaTeX_Fragments), [latex +fragments](#Entities_and_LaTeX_Fragments), +[subscript](#Subscript_and_Superscript) and +[superscript](#Subscript_and_Superscript). + +PROTOCOL is a string among `org-link-types`. + +PATH can contain any character but `]`, `<`, `>` and `\n`. + +PRE2 and POST2, when they exist, are non word constituent characters. + +PATH2 can contain any non-whitespace character excepted `(`, `)`, `<` and `>`. +It must end with a word-constituent character, or any non-whitespace +non-punctuation character followed by `/`. + +DESCRIPTION must be enclosed within square brackets. It can contain any +character but square brackets. It can contain any object found in a paragraph +excepted a [footnote reference](#Footnote_References), a [radio +target](#Targets_and_Radio_Targets) and a [line break](#Line_Breaks). It cannot +contain another link either, unless it is a plain or angular link. + +DESCRIPTION is optional. + +PATH3 is built according to the following patterns: + + FILENAME ("file" type) + PROTOCOL:PATH4 ("PROTOCOL" type) + PROTOCOL://PATH4 ("PROTOCOL" type) + id:ID ("id" type) + #CUSTOM-ID ("custom-id" type) + (CODEREF) ("coderef" type) + FUZZY ("fuzzy" type) + +FILENAME is a file name, either absolute or relative. + +PATH4 can contain any character besides square brackets. + +ID is constituted of hexadecimal numbers separated with hyphens. + +PATH4, CUSTOM-ID, CODEREF and FUZZY can contain any character besides square +brackets. + + + +## Macros + +Macros follow the pattern: + + {{{NAME(ARGUMENTS)}}} + +NAME must start with a letter and can be followed by any number of alpha-numeric +characters, hyphens and underscores. + +ARGUMENTS can contain anything but `}}}` string. Values within ARGUMENTS are +separated by commas. Non-separating commas have to be escaped with a backslash +character. + + + +## Targets and Radio Targets + +Radio targets follow the pattern: + + <<>> + +CONTENTS can be any character besides `<`, `>` and `\n`. It cannot start or end +with a whitespace character. As far as objects go, it can contain [text +markup](#Emphasis_Markers), [entities](#Entities_and_LaTeX_Fragments), [latex +fragments](#Entities_and_LaTeX_Fragments), +[subscript](#Subscript_and_Superscript) and +[superscript](#Subscript_and_Superscript) only. + +Targets follow the pattern: + + <> + +TARGET can contain any character besides `<`, `>` and `\n`. It cannot start or +end with a whitespace character. It cannot contain any object. + + + +## Statistics Cookies + +Statistics cookies follow either pattern: + + [PERCENT%] + [NUM1/NUM2] + +PERCENT, NUM1 and NUM2 are numbers or the empty string. + + + +## Subscript and Superscript + +Pattern for subscript is: + + CHAR_SCRIPT + +Pattern for superscript is: + + CHAR^SCRIPT + +CHAR is any non-whitespace character. + +SCRIPT can be `*` or an expression enclosed in parenthesis (respectively curly +brackets), possibly containing balanced parenthesis (respectively curly +brackets). + +SCRIPT can also follow the pattern: + + SIGN CHARS FINAL + +SIGN is either a plus sign, a minus sign, or an empty string. + +CHARS is any number of alpha-numeric characters, commas, backslashes and dots, +or an empty string. + +FINAL is an alpha-numeric character. + +There is no white space between SIGN, CHARS and FINAL. + + + +## Table Cells + +Table cells follow the pattern: + + CONTENTS SPACES| + +CONTENTS can contain any character excepted a vertical bar. + +SPACES contains any number of space characters, including zero. It can be used +to align properly the table. + +The final bar may be replaced with a newline character for the last cell in row. + + + +## Timestamps + +There are seven possible patterns for timestamps: + + <%%(SEXP)> (diary) + (active) + [DATE TIME REPEATER-OR-DELAY] (inactive) + -- (active range) + (active range) + [DATE TIME REPEATER-OR-DELAY]--[DATE TIME REPEATER-OR-DELAY] (inactive range) + [DATE TIME-TIME REPEATER-OR-DELAY] (inactive range) + +SEXP can contain any character excepted `>` and `\n`. + +DATE follows the pattern: + + YYYY-MM-DD DAYNAME + +`Y`, `M` and `D` are digits. DAYNAME can contain any non whitespace-character +besides `+`, `-`, `]`, `>`, a digit or `\n`. + +TIME follows the pattern `H:MM`. `H` can be one or two digit long and can start +with 0. + +REPEATER-OR-DELAY follows the pattern: + + MARK VALUE UNIT + +MARK is `+` (cumulate type), `++` (catch-up type) or `.+` (restart type) for a +repeater, and `-` (all type) or `--` (first type) for warning delays. + +VALUE is a number. + +UNIT is a character among `h` (hour), `d` (day), `w` (week), `m` (month), `y` +(year). + +MARK, VALUE and UNIT are not separated by whitespace characters. + +There can be two REPEATER-OR-DELAY in the timestamp: one as a repeater and one +as a warning delay. + + + +## Text Markup + +Text markup follows the pattern: + + PRE MARKER CONTENTS MARKER POST + +PRE is a whitespace character, `(`, `{` `'` or a double quote. It can also be a +beginning of line. + +MARKER is a character among `*` (bold), `=` (verbatim), `/` (italic), `+` +(strike-through), `_` (underline), `~` (code). + +CONTENTS is a string following the pattern: + + BORDER BODY BORDER + +BORDER can be any non-whitespace character excepted `,`, `'` or a double quote. + +BODY can contain contain any character but may not span over more than 3 lines. + +BORDER and BODY are not separated by whitespaces. + +CONTENTS can contain any object encountered in a paragraph when markup is +**bold**, **italic**, **strike-through** or **underline**. + +POST is a whitespace character, `-`, `.`, `,`, `:`, `!`, `?`, `'`, `)`, `}` or a +double quote. It can also be an end of line. + +PRE, MARKER, CONTENTS, MARKER and POST are not separated by whitespace +characters. + +--- + +> All of this is wrong if `org-emphasis-regexp-components` or +> `org-emphasis-alist` are modified. +> +> This should really be simplified. +> +> Also, CONTENTS should be anything within code and verbatim emphasis, by +> definition. — ngz + +# Footnotes + +1 In particular, the parser requires +stars at column 0 to be quoted by a comma when they do not define a headline. + +2 It also means that only headlines +and sections can be recognized just by looking at the beginning of the line. +Planning lines and property drawers can be recognized by looking at one or two +lines above. + +As a consequence, using `org-element-at-point` or `org-element-context` will +move up to the parent headline, and parse top-down from there until context +around original location is found. diff --git a/examples/custom.rs b/examples/custom.rs new file mode 100644 index 0000000..3f650ff --- /dev/null +++ b/examples/custom.rs @@ -0,0 +1,81 @@ +use std::convert::From; +use std::env::args; +use std::fs; +use std::io::{Error as IOError, Write}; +use std::result::Result; +use std::string::FromUtf8Error; + +use orgize::export::{DefaultHtmlHandler, HtmlHandler}; +use orgize::{Element, Org}; +use slugify::slugify; + +#[derive(Debug)] +enum MyError { + IO(IOError), + Heading, + Utf8(FromUtf8Error), +} + +// From trait is required for custom error type +impl From for MyError { + fn from(err: IOError) -> Self { + MyError::IO(err) + } +} + +impl From for MyError { + fn from(err: FromUtf8Error) -> Self { + MyError::Utf8(err) + } +} + +#[derive(Default)] +struct MyHtmlHandler(DefaultHtmlHandler); + +impl HtmlHandler for MyHtmlHandler { + fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { + if let Element::Title(title) = element { + if title.level > 6 { + return Err(MyError::Heading); + } else { + write!( + w, + "", + title.level, + slugify!(&title.raw), + )?; + } + } else { + // fallthrough to default handler + self.0.start(w, element)?; + } + Ok(()) + } + + fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { + if let Element::Title(title) = element { + write!(w, "", title.level)?; + } else { + self.0.end(w, element)?; + } + Ok(()) + } +} + +fn main() -> Result<(), MyError> { + let args: Vec<_> = args().collect(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + } else { + let contents = String::from_utf8(fs::read(&args[1])?)?; + + let mut writer = Vec::new(); + let mut handler = MyHtmlHandler::default(); + Org::parse(&contents).write_html_custom(&mut writer, &mut handler)?; + + println!("{}", String::from_utf8(writer)?); + } + + Ok(()) +} diff --git a/examples/html-slugify.rs b/examples/html-slugify.rs deleted file mode 100644 index 57972dd..0000000 --- a/examples/html-slugify.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! ```bash -//! cargo run --example html-slugify '* hello world!' -//! ``` - -use orgize::{ - export::HtmlExport, - export::{from_fn_with_ctx, Container, Event, Traverser}, - Org, -}; -use slugify::slugify; -use std::cmp::min; -use std::env::args; - -fn main() { - let args: Vec<_> = args().collect(); - - if args.len() < 2 { - eprintln!("Usage: {} ", args[0]); - } else { - let mut html_export = HtmlExport::default(); - - let mut handler = from_fn_with_ctx(|event, ctx| { - if let Event::Enter(Container::Headline(headline)) = event { - let level = min(headline.level(), 6); - let title = headline.title().map(|e| e.to_string()).collect::(); - html_export.push_str(format!( - "", - slugify!(&title) - )); - for elem in headline.title() { - html_export.element(elem, ctx); - } - html_export.push_str(format!("")); - } else { - // forward to default html export - html_export.event(event, ctx); - } - }); - - Org::parse(&args[1]).traverse(&mut handler); - - println!("{}", html_export.finish()); - } -} diff --git a/examples/iter.rs b/examples/iter.rs new file mode 100644 index 0000000..1f95f67 --- /dev/null +++ b/examples/iter.rs @@ -0,0 +1,19 @@ +use orgize::Org; +use std::env::args; +use std::fs; +use std::io::Result; + +fn main() -> Result<()> { + let args: Vec<_> = args().collect(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + } else { + let contents = String::from_utf8(fs::read(&args[1])?).unwrap(); + + for event in Org::parse(&contents).iter() { + println!("{:?}", event); + } + } + Ok(()) +} diff --git a/examples/json.rs b/examples/json.rs new file mode 100644 index 0000000..e2ac5cf --- /dev/null +++ b/examples/json.rs @@ -0,0 +1,17 @@ +use orgize::Org; +use serde_json::to_string; +use std::env::args; +use std::fs; +use std::io::Result; + +fn main() -> Result<()> { + let args: Vec<_> = args().collect(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + } else { + let contents = String::from_utf8(fs::read(&args[1])?).unwrap(); + println!("{}", to_string(&Org::parse(&contents)).unwrap()); + } + Ok(()) +} diff --git a/examples/markdown.rs b/examples/markdown.rs deleted file mode 100644 index 137c1db..0000000 --- a/examples/markdown.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! ```bash -//! cargo run --example markdown test.org -//! ``` - -use orgize::{export::MarkdownExport, Org}; -use std::{env::args, fs}; - -fn main() { - let args: Vec<_> = args().collect(); - - if args.len() < 2 { - panic!("Usage: {} ", args[0]); - } - - let content = fs::read_to_string(&args[1]).unwrap(); - - let mut export = MarkdownExport::default(); - Org::parse(content).traverse(&mut export); - - fs::write(format!("{}.md", &args[1]), export.finish()).unwrap(); - - println!("Wrote to {}.md", &args[1]); -} diff --git a/examples/parse.rs b/examples/parse.rs deleted file mode 100644 index 978371d..0000000 --- a/examples/parse.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! ```bash -//! cargo run --example parse '* hello\n** /world/!' -//! ``` - -use orgize::Org; -use rowan::ast::AstNode; -use std::env::args; -use tracing_subscriber::fmt::format::FmtSpan; - -fn main() { - let args: Vec<_> = args().collect(); - - tracing_subscriber::fmt() - .without_time() - .with_file(true) - .with_span_events(FmtSpan::NEW) - .with_line_number(true) - .with_max_level(tracing::Level::TRACE) - .with_file(false) - .with_line_number(false) - .init(); - - if args.len() < 2 { - eprintln!("Usage: {} ", args[0]); - } else { - let s = &args[1].replace(r"\n", "\n").replace(r"\r", "\r"); - let org = Org::parse(s); - println!("{:#?}", org.document().syntax()); - } -} diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 1a45eee..a092511 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,4 +1,3 @@ target corpus artifacts -coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index eeb3de4..9162691 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,27 +1,20 @@ [package] name = "orgize-fuzz" -version = "0.0.0" +version = "0.0.1" +authors = ["Automatically generated"] publish = false -edition = "2018" [package.metadata] cargo-fuzz = true [dependencies] -libfuzzer-sys = "0.4" - -[dependencies.orgize] -path = ".." +libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } +orgize = { path = ".." } # Prevent this from interfering with workspaces [workspace] members = ["."] -[profile.release] -debug = 1 - [[bin]] name = "fuzz_target_1" path = "fuzz_targets/fuzz_target_1.rs" -test = false -doc = false diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 4511ba0..bee8bcb 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -1,7 +1,14 @@ #![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate orgize; + +use orgize::Org; + +#[cfg_attr(rustfmt, rustfmt_skip)] libfuzzer_sys::fuzz_target!(|data: &[u8]| { - if let Ok(utf8) = std::str::from_utf8(data) { - let _ = orgize::Org::parse(utf8); + if let Ok(s) = std::str::from_utf8(data) { + let _ = Org::parse(s); } }); diff --git a/src/ast/affiliated_keyword.rs b/src/ast/affiliated_keyword.rs deleted file mode 100644 index 556c002..0000000 --- a/src/ast/affiliated_keyword.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::syntax::SyntaxKind; - -use super::{filter_token, AffiliatedKeyword, Token}; - -impl AffiliatedKeyword { - /// - /// ```rust - /// use orgize::{Org, ast::AffiliatedKeyword}; - /// - /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.key(), "CAPTION"); - /// ``` - pub fn key(&self) -> Token { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("keyword must contains TEXT") - } - - /// - /// ```rust - /// use orgize::{Org, ast::AffiliatedKeyword}; - /// - /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); - /// assert!(keyword.optional().is_none()); - /// let keyword = Org::parse("#+CAPTION[OPTIONAL]: VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.optional().unwrap(), "OPTIONAL"); - /// ``` - pub fn optional(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|it| it.kind() != SyntaxKind::L_BRACKET) - .nth(1) - .and_then(filter_token(SyntaxKind::TEXT)) - } - - /// - /// ```rust - /// use orgize::{Org, ast::AffiliatedKeyword}; - /// - /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.value().unwrap(), " VALUE"); - /// let keyword = Org::parse("#+CAPTION[OPTIONAL]:VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.value().unwrap(), "VALUE"); - /// ``` - pub fn value(&self) -> Option { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .last() - } -} diff --git a/src/ast/block.rs b/src/ast/block.rs deleted file mode 100644 index 8e7fad5..0000000 --- a/src/ast/block.rs +++ /dev/null @@ -1,194 +0,0 @@ -use super::{ - filter_token, CenterBlock, CommentBlock, DynBlock, ExampleBlock, ExportBlock, QuoteBlock, - SourceBlock, SpecialBlock, SyntaxKind, Token, VerseBlock, -}; -use rowan::TextSize; - -impl SourceBlock { - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.language().unwrap(), "c"); - /// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.language().unwrap(), "javascript"); - /// - /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); - /// assert!(block.language().is_none()); - /// ```` - pub fn language(&self) -> Option { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE)) - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.switches().unwrap(), "-n 20"); - /// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.switches().unwrap(), "-n 20 -r"); - /// - /// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::().unwrap(); - /// assert!(block.switches().is_none()); - /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); - /// assert!(block.switches().is_none()); - /// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::().unwrap(); - /// assert!(block.switches().is_none()); - /// ```` - pub fn switches(&self) -> Option { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES)) - } - - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.parameters().unwrap(), ":tangle yes"); - /// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::().unwrap(); - /// assert_eq!(block.parameters().unwrap(), ":tangle"); - /// - /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); - /// assert!(block.parameters().is_none()); - /// ```` - pub fn parameters(&self) -> Option { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS)) - } - - /// Return unescaped source code string - /// - /// ```rust - /// use orgize::{Org, ast::SourceBlock}; - /// - /// let block = Org::parse(r#" - /// #+begin_src - /// #+end_src - /// "#).first_node::().unwrap(); - /// assert_eq!(block.value(), ""); - /// - /// let block = Org::parse(r#" - /// #+begin_src - /// ,* foo - /// ,#+ bar - /// #+end_src - /// "#).first_node::().unwrap(); - /// assert_eq!(block.value(), "* foo\n#+ bar\n"); - /// ```` - pub fn value(&self) -> String { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .filter_map(filter_token(SyntaxKind::TEXT)) - .fold(String::new(), |acc, value| acc + &value) - } -} - -impl ExportBlock { - /// ```rust - /// use orgize::{Org, ast::ExportBlock}; - /// - /// let block = Org::parse("#+begin_export html\n#+end_export").first_node::().unwrap(); - /// assert_eq!(block.ty().unwrap(), "html"); - /// - /// let block = Org::parse("#+begin_export\n#+end_export").first_node::().unwrap(); - /// assert!(block.ty().is_none()); - /// ```` - pub fn ty(&self) -> Option { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE)) - } - - /// Returns export block contents - /// - /// ```rust - /// use orgize::{Org, ast::ExportBlock}; - /// - /// let block = Org::parse(r#" - /// #+begin_export html - /// - /// #+end_export - /// "#).first_node::().unwrap(); - /// assert_eq!(block.value(), "\n"); - /// - /// let block = Org::parse(r#" - /// #+BEGIN_EXPORT org - /// ,#+BEGIN_EXPORT html - /// - /// ,#+END_EXPORT - /// #+END_EXPORT - /// "#).first_node::().unwrap(); - /// assert_eq!(block.value(), r#"#+BEGIN_EXPORT html - /// - /// #+END_EXPORT - /// "#); - /// ``` - pub fn value(&self) -> String { - self.syntax - .children() - .find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - .filter_map(filter_token(SyntaxKind::TEXT)) - .fold(String::new(), |acc, value| acc + &value) - } -} - -macro_rules! impl_content_border { - ($block:ident) => { - impl $block { - /// Beginning position of block content - pub fn content_start(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::BLOCK_BEGIN) - .map(|n| n.text_range().end()) - .unwrap_or_else(|| { - debug_assert!(false, "block must contains BLOCK_BEGIN"); - TextSize::default() - }) - } - - /// Ending position of block content - pub fn content_end(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::BLOCK_END) - .map(|n| n.text_range().start()) - .unwrap_or_else(|| { - debug_assert!(false, "block must contains BLOCK_END"); - TextSize::default() - }) - } - } - }; -} - -impl_content_border!(SourceBlock); -impl_content_border!(ExportBlock); -impl_content_border!(CenterBlock); -impl_content_border!(CommentBlock); -impl_content_border!(ExampleBlock); -impl_content_border!(QuoteBlock); -impl_content_border!(SpecialBlock); -impl_content_border!(VerseBlock); -impl_content_border!(DynBlock); diff --git a/src/ast/clock.rs b/src/ast/clock.rs deleted file mode 100644 index e8df645..0000000 --- a/src/ast/clock.rs +++ /dev/null @@ -1,58 +0,0 @@ -use rowan::ast::support; - -use crate::{ast::Token, SyntaxKind}; - -use super::{Clock, Timestamp}; - -impl Clock { - pub fn value(&self) -> Option { - support::child(&self.syntax) - } - - /// ```rust - /// use orgize::{Org, ast::Clock}; - /// - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); - /// assert!(clock.duration().is_none()); - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); - /// assert_eq!(clock.duration().unwrap(), "12:00"); - /// - /// ``` - pub fn duration(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|t| t.kind() != SyntaxKind::DOUBLE_ARROW) - .skip(1) - .find(|t| t.kind() != SyntaxKind::WHITESPACE) - .and_then(|e| { - debug_assert_eq!(e.kind(), SyntaxKind::TEXT); - Some(Token(e.into_token()?)) - }) - } - - /// ```rust - /// use orgize::{Org, ast::Clock}; - /// - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); - /// assert!(!clock.is_closed()); - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); - /// assert!(clock.is_closed()); - /// ``` - pub fn is_closed(&self) -> bool { - self.syntax - .children_with_tokens() - .any(|t| t.kind() == SyntaxKind::DOUBLE_ARROW) - } - - /// ```rust - /// use orgize::{Org, ast::Clock}; - /// - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); - /// assert!(clock.is_running()); - /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); - /// assert!(!clock.is_running()); - /// ``` - pub fn is_running(&self) -> bool { - !self.is_closed() - } -} diff --git a/src/ast/cloze.rs b/src/ast/cloze.rs deleted file mode 100644 index f85484b..0000000 --- a/src/ast/cloze.rs +++ /dev/null @@ -1,111 +0,0 @@ -use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode}; -use rowan::{ast::AstNode, TextRange, TextSize}; - -use super::Token; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Cloze { - pub(crate) syntax: SyntaxNode, -} - -impl AstNode for Cloze { - type Language = OrgLanguage; - - fn can_cast(kind: SyntaxKind) -> bool { - kind == SyntaxKind::CLOZE - } - - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Cloze { syntax: node }) - } - - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} - -impl Cloze { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - - pub fn text(&self) -> impl Iterator { - self.syntax - .children_with_tokens() - .skip(1) - .take_while(|n| n.kind() != SyntaxKind::R_CURLY) - } - - /// ```rust - /// use orgize::{Org, ast::Cloze}; - /// - /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); - /// assert_eq!(cloze.text_raw(), "text"); - /// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::().unwrap(); - /// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$"); - /// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::().unwrap(); - /// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] "); - /// ``` - pub fn text_raw(&self) -> String { - self.text() - .fold(String::new(), |acc, e| acc + &e.to_string()) - } - - /// ```rust - /// use orgize::{Org, ast::Cloze}; - /// - /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); - /// assert!(cloze.hint().is_none()); - /// let cloze = Org::parse("{{text}{}@id}").first_node::().unwrap(); - /// assert_eq!(cloze.hint().unwrap(), ""); - /// let cloze = Org::parse("{{text}{hint}}").first_node::().unwrap(); - /// assert_eq!(cloze.hint().unwrap(), "hint"); - /// ``` - pub fn hint(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|n| n.kind() != SyntaxKind::L_CURLY) - .nth(1) - .and_then(|e| { - debug_assert_eq!(e.kind(), SyntaxKind::TEXT); - Some(Token(e.into_token()?)) - }) - } - - /// ```rust - /// use orgize::{Org, ast::Cloze}; - /// - /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); - /// assert!(cloze.id().is_none()); - /// let cloze = Org::parse("{{text}@}").first_node::().unwrap(); - /// assert_eq!(cloze.id().unwrap(), ""); - /// let cloze = Org::parse("{{text}@id}").first_node::().unwrap(); - /// assert_eq!(cloze.id().unwrap(), "id"); - /// ``` - pub fn id(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|n| n.kind() != SyntaxKind::AT) - .nth(1) - .and_then(|e| { - debug_assert_eq!(e.kind(), SyntaxKind::TEXT); - Some(Token(e.into_token()?)) - }) - } -} diff --git a/src/ast/comment.rs b/src/ast/comment.rs deleted file mode 100644 index a08bc39..0000000 --- a/src/ast/comment.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::SyntaxKind; - -use super::{filter_token, Comment}; - -impl Comment { - /// Contents without pound signs - /// - /// ```rust - /// use orgize::{ast::Comment, Org}; - /// - /// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::().unwrap(); - /// assert_eq!(fixed.value(), "A\n\nB\nC"); - /// ``` - pub fn value(&self) -> String { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .fold(String::new(), |acc, text| acc + &text) - } -} diff --git a/src/ast/document.rs b/src/ast/document.rs deleted file mode 100644 index f248769..0000000 --- a/src/ast/document.rs +++ /dev/null @@ -1,88 +0,0 @@ -use rowan::ast::AstNode; - -use crate::Org; - -use super::{Document, Keyword, PropertyDrawer}; - -impl Document { - /// Returns an iterator of keywords in zeroth section - /// - /// ```rust - /// use orgize::{Org, ast::Document}; - /// - /// let org = Org::parse(r#" - /// #+TITLE: hello - /// #+TITLE: world - /// #+DATE: today - /// #+AUTHOR: poi - /// * headline - /// #+SOMETHING:"#); - /// let doc = org.first_node::().unwrap(); - /// assert_eq!(doc.keywords().count(), 4); - /// ``` - pub fn keywords(&self) -> impl Iterator { - self.section() - .into_iter() - .flat_map(|section| section.syntax.children().filter_map(Keyword::cast)) - } - - /// Returns the value in top-level `#+TITLE` - /// - /// Multiple `#+TITLE` are joined with spaces. - /// - /// Returns `None` if file doesn't contain `#+TITLE` - /// - /// ```rust - /// use orgize::{Org, ast::Document}; - /// - /// let org = Org::parse("#+TITLE: hello\n#+TITLE: world"); - /// let doc = org.first_node::().unwrap(); - /// assert_eq!(doc.title().unwrap(), "hello world"); - /// - /// let org = Org::parse(""); - /// let doc = org.first_node::().unwrap(); - /// assert!(doc.title().is_none()); - /// ``` - pub fn title(&self) -> Option { - self.keywords() - .filter(|kw| kw.key().eq_ignore_ascii_case("TITLE")) - .fold(Option::::None, |acc, cur| { - let mut s = acc.unwrap_or_default(); - if !s.is_empty() { - s.push(' '); - } - s.push_str(cur.value().trim()); - Some(s) - }) - } - - /// Returns top-level properties drawer - /// - /// ```rust - /// use orgize::{Org, ast::Document}; - /// - /// let org = Org::parse(r#":PROPERTIES: - /// :ID: 20220718T085035.042592 - /// :END: - /// #+TITLE: Complete Computing"#); - /// - /// let properties = org.document().properties().unwrap(); - /// assert_eq!(properties.to_hash_map().len(), 1); - /// assert_eq!(properties.get("ID").unwrap(), "20220718T085035.042592"); - /// ``` - pub fn properties(&self) -> Option { - rowan::ast::support::child(&self.syntax) - } -} - -impl Org { - /// Equals to `self.document().title()`, see [Document::title] - pub fn title(&self) -> Option { - self.document().title() - } - - /// Equals to `self.document().keywords()`, see [Document::keywords] - pub fn keywords(&self) -> impl Iterator { - self.document().keywords() - } -} diff --git a/src/ast/drawer.rs b/src/ast/drawer.rs deleted file mode 100644 index 3111ee6..0000000 --- a/src/ast/drawer.rs +++ /dev/null @@ -1,140 +0,0 @@ -use rowan::TextSize; -use std::collections::HashMap; - -use super::{filter_token, Drawer, PropertyDrawer, SyntaxKind, Token}; - -impl PropertyDrawer { - /// ```rust - /// use orgize::{Org, ast::PropertyDrawer}; - /// - /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); - /// let drawer = org.first_node::().unwrap(); - /// assert_eq!(drawer.iter().count(), 2); - /// ``` - pub fn iter(&self) -> impl Iterator { - self.node_properties().filter_map(|property| { - let mut texts = property - .syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)); - - Some((texts.next()?, texts.next()?)) - }) - } - - /// ```rust - /// use orgize::{Org, ast::PropertyDrawer}; - /// - /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); - /// let drawer = org.first_node::().unwrap(); - /// assert_eq!(drawer.get("CUSTOM_ID").unwrap(), "someid"); - /// assert_eq!(drawer.get("ID").unwrap(), "id"); - /// ``` - pub fn get(&self, key: &str) -> Option { - self.iter().find_map(|(k, v)| (k == key).then_some(v)) - } - - /// ```rust - /// use orgize::{Org, ast::PropertyDrawer}; - /// - /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:CUSTOM_ID: id\n:END:"); - /// let drawer = org.first_node::().unwrap(); - /// let map = drawer.to_hash_map(); - /// assert_eq!(map.len(), 1); - /// assert_eq!(map.get("CUSTOM_ID").unwrap(), "id"); - /// ``` - pub fn to_hash_map(&self) -> HashMap { - self.iter().collect() - } - - #[cfg(feature = "indexmap")] - /// ```rust - /// use orgize::{Org, ast::PropertyDrawer}; - /// - /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); - /// let drawer = org.first_node::().unwrap(); - /// let map = drawer.to_index_map(); - /// let item1 = map.get_index(1).unwrap(); - /// assert_eq!(item1.0, "ID"); - /// assert_eq!(item1.1, "id"); - /// ``` - pub fn to_index_map(&self) -> indexmap::IndexMap { - self.iter().collect() - } - - /// Beginning position of drawer content - pub fn content_start(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) - .map(|n| n.text_range().end()) - .unwrap_or_else(|| { - debug_assert!(false, "property drawer must contains DRAWER_BEGIN"); - TextSize::default() - }) - } - - /// Ending position of drawer content - pub fn content_end(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_END) - .map(|n| n.text_range().start()) - .unwrap_or_else(|| { - debug_assert!(false, "property drawer must contains DRAWER_END"); - TextSize::default() - }) - } -} - -impl Drawer { - /// ```rust - /// use orgize::{Org, ast::Drawer}; - /// - /// let org = Org::parse("* Heading\n:LOGBOOK:\n:END:"); - /// let drawer = org.first_node::().unwrap(); - /// assert_eq!(drawer.name(), "LOGBOOK"); - /// ``` - pub fn name(&self) -> Token { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) - .expect("drawer must contains DRAWER_BEGIN") - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("drawer begin must contains TEXT") - } - - /// Beginning position of drawer content - pub fn content_start(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) - .map(|n| n.text_range().end()) - .unwrap_or_else(|| { - debug_assert!(false, "drawer must contains DRAWER_BEGIN"); - TextSize::default() - }) - } - - /// Ending position of drawer content - pub fn content_end(&self) -> TextSize { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_END) - .map(|n| n.text_range().start()) - .unwrap_or_else(|| { - debug_assert!(false, "drawer must contains DRAWER_END"); - TextSize::default() - }) - } - - /// Raw text of drawer content - pub fn content_raw(&self) -> String { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::DRAWER_CONTENT) - .map(|n| n.to_string()) - .unwrap_or_default() - } -} diff --git a/src/ast/entity.rs b/src/ast/entity.rs deleted file mode 100644 index 49cd19c..0000000 --- a/src/ast/entity.rs +++ /dev/null @@ -1,168 +0,0 @@ -use crate::{entities::ENTITIES, SyntaxKind}; - -use super::{filter_token, Entity}; - -impl Entity { - fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> { - let token = self - .syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT))?; - - ENTITIES.iter().find(|i| i.0 == token.as_ref()) - } - - /// Entity name - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\alpha{}").first_node::().unwrap(); - /// assert_eq!(e.name(), "alpha"); - /// let e = Org::parse("\\_ ").first_node::().unwrap(); - /// assert_eq!(e.name(), " "); - /// ``` - pub fn name(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.0, - ) - } - - /// Entity LaTeX representation - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\middot").first_node::().unwrap(); - /// assert_eq!(e.latex(), "\\textperiodcentered{}"); - /// ``` - pub fn latex(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.1, - ) - } - - /// Whether entity needs to be in math mode - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\middot").first_node::().unwrap(); - /// assert!(!e.is_latex_math()); - /// let e = Org::parse("\\alefsym").first_node::().unwrap(); - /// assert!(e.is_latex_math()); - /// ``` - pub fn is_latex_math(&self) -> bool { - self.entity().map_or_else( - || { - debug_assert!(false); - false - }, - |e| e.2, - ) - } - - /// Entity HTML representation - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\S").first_node::().unwrap(); - /// assert_eq!(e.html(), "§"); - /// ``` - pub fn html(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.3, - ) - } - - /// Entity ASCII representation - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\S").first_node::().unwrap(); - /// assert_eq!(e.ascii(), "section"); - /// ``` - pub fn ascii(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.4, - ) - } - - /// Entity Latin1 encoding representation - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\S").first_node::().unwrap(); - /// assert_eq!(e.latin1(), "§"); - /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); - /// assert_eq!(e.latin1(), ">"); - /// ``` - pub fn latin1(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.5, - ) - } - - /// Entity UTF-8 encoding representation - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\S").first_node::().unwrap(); - /// assert_eq!(e.utf8(), "§"); - /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); - /// assert_eq!(e.utf8(), "›"); - /// ``` - pub fn utf8(&self) -> &str { - self.entity().map_or_else( - || { - debug_assert!(false); - "" - }, - |e| e.6, - ) - } - - /// Entity contains optional brackets - /// - /// ```rust - /// use orgize::{ast::Entity, Org}; - /// - /// let e = Org::parse("\\beta").first_node::().unwrap(); - /// assert!(!e.is_use_brackets()); - /// let e = Org::parse("\\S{}").first_node::().unwrap(); - /// assert!(e.is_use_brackets()); - /// let e = Org::parse("\\_ ").first_node::().unwrap(); - /// assert!(!e.is_use_brackets()); - /// ``` - pub fn is_use_brackets(&self) -> bool { - self.syntax - .children_with_tokens() - .filter(|n| n.kind() == SyntaxKind::TEXT) - .nth(1) - .is_some() - } -} diff --git a/src/ast/fixed_width.rs b/src/ast/fixed_width.rs deleted file mode 100644 index b601738..0000000 --- a/src/ast/fixed_width.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::SyntaxKind; - -use super::{filter_token, FixedWidth}; - -impl FixedWidth { - /// Contents without colons prefix - /// - /// ```rust - /// use orgize::{ast::FixedWidth, Org}; - /// - /// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::().unwrap(); - /// assert_eq!(fixed.value(), "A\n\nB\nC"); - /// ``` - pub fn value(&self) -> String { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .fold(String::new(), |acc, text| acc + &text) - } -} diff --git a/src/ast/generate.js b/src/ast/generate.js deleted file mode 100644 index 81bbc4f..0000000 --- a/src/ast/generate.js +++ /dev/null @@ -1,357 +0,0 @@ -const nodes = [ - { - struct: "Document", - kind: ["DOCUMENT"], - pre_blank: true, - first_child: [ - ["section", "Section"], - ["first_headline", "Headline"], - ], - last_child: [["last_headline", "Headline"]], - children: [["headlines", "Headline"]], - }, - { - struct: "Section", - kind: ["SECTION"], - post_blank: true, - }, - { - struct: "Paragraph", - kind: ["PARAGRAPH"], - post_blank: true, - affiliated_keywords: true, - }, - { - struct: "Headline", - kind: ["HEADLINE"], - first_child: [ - ["section", "Section"], - ["planning", "Planning"], - ["properties", "PropertyDrawer"], - ], - children: [["headlines", "Headline"]], - post_blank: true, - }, - { - struct: "PropertyDrawer", - kind: ["PROPERTY_DRAWER"], - children: [["node_properties", "NodeProperty"]], - }, - { - struct: "NodeProperty", - kind: ["NODE_PROPERTY"], - }, - { - struct: "Planning", - kind: ["PLANNING"], - }, - { - struct: "OrgTable", - kind: ["ORG_TABLE"], - post_blank: true, - affiliated_keywords: true, - }, - { - struct: "OrgTableRow", - kind: ["ORG_TABLE_RULE_ROW", "ORG_TABLE_STANDARD_ROW"], - }, - { - struct: "OrgTableCell", - kind: ["ORG_TABLE_CELL"], - }, - { - struct: "List", - kind: ["LIST"], - children: [["items", "ListItem"]], - affiliated_keywords: true, - }, - { - struct: "ListItem", - kind: ["LIST_ITEM"], - }, - { - struct: "Drawer", - kind: ["DRAWER"], - }, - { - struct: "DynBlock", - kind: ["DYN_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "Keyword", - kind: ["KEYWORD"], - }, - { - struct: "BabelCall", - kind: ["BABEL_CALL"], - }, - { - struct: "AffiliatedKeyword", - kind: ["AFFILIATED_KEYWORD"], - }, - { - struct: "TableEl", - kind: ["TABLE_EL"], - post_blank: true, - }, - { - struct: "Clock", - kind: ["CLOCK"], - post_blank: true, - }, - { - struct: "FnDef", - kind: ["FN_DEF"], - post_blank: true, - affiliated_keywords: true, - }, - { - struct: "Comment", - kind: ["COMMENT"], - post_blank: true, - token: [["text", "TEXT"]], - affiliated_keywords: true, - }, - { - struct: "Rule", - kind: ["RULE"], - post_blank: true, - }, - { - struct: "FixedWidth", - kind: ["FIXED_WIDTH"], - post_blank: true, - token: [["text", "TEXT"]], - affiliated_keywords: true, - }, - { - struct: "SpecialBlock", - kind: ["SPECIAL_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "QuoteBlock", - kind: ["QUOTE_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "CenterBlock", - kind: ["CENTER_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "VerseBlock", - kind: ["VERSE_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "CommentBlock", - kind: ["COMMENT_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "ExampleBlock", - kind: ["EXAMPLE_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "ExportBlock", - kind: ["EXPORT_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "SourceBlock", - kind: ["SOURCE_BLOCK"], - affiliated_keywords: true, - }, - { - struct: "InlineCall", - kind: ["INLINE_CALL"], - }, - { - struct: "InlineSrc", - kind: ["INLINE_SRC"], - }, - { - struct: "Link", - kind: ["LINK"], - }, - { - struct: "Cookie", - kind: ["COOKIE"], - }, - { - struct: "RadioTarget", - kind: ["RADIO_TARGET"], - }, - { - struct: "FnRef", - kind: ["FN_REF"], - }, - { - struct: "Macros", - kind: ["MACROS"], - }, - { - struct: "Snippet", - kind: ["SNIPPET"], - }, - { - struct: "Target", - kind: ["TARGET"], - }, - { - struct: "Bold", - kind: ["BOLD"], - }, - { - struct: "Strike", - kind: ["STRIKE"], - }, - { - struct: "Italic", - kind: ["ITALIC"], - }, - { - struct: "Underline", - kind: ["UNDERLINE"], - }, - { - struct: "Verbatim", - kind: ["VERBATIM"], - }, - { - struct: "Code", - kind: ["CODE"], - token: [["text", "TEXT"]], - }, - { - struct: "Timestamp", - kind: ["TIMESTAMP_ACTIVE", "TIMESTAMP_INACTIVE", "TIMESTAMP_DIARY"], - token: [ - ["year_start", "TIMESTAMP_YEAR"], - ["month_start", "TIMESTAMP_MONTH"], - ["day_start", "TIMESTAMP_DAY"], - ["hour_start", "TIMESTAMP_HOUR"], - ["minute_start", "TIMESTAMP_MINUTE"], - ], - last_token: [ - ["year_end", "TIMESTAMP_YEAR"], - ["month_end", "TIMESTAMP_MONTH"], - ["day_end", "TIMESTAMP_DAY"], - ["hour_end", "TIMESTAMP_HOUR"], - ["minute_end", "TIMESTAMP_MINUTE"], - ], - }, - { - struct: "LatexEnvironment", - kind: ["LATEX_ENVIRONMENT"], - }, - { - struct: "LatexFragment", - kind: ["LATEX_FRAGMENT"], - }, - { - struct: "Entity", - kind: ["ENTITY"], - }, - { - struct: "LineBreak", - kind: ["LINE_BREAK"], - }, - { - struct: "Superscript", - kind: ["SUPERSCRIPT"], - }, - { - struct: "Subscript", - kind: ["SUBSCRIPT"], - }, -]; - -let content = `//! generated file, do not modify it directly -#![allow(clippy::all)] -#![allow(unused)] - -use rowan::{ast::{support, AstChildren, AstNode}, TextSize, TextRange}; -use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken}; - -fn affiliated_keyword(node: &SyntaxNode, filter: impl Fn(&str) -> bool) -> Option { - node.children() - .take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD) - .filter_map(AffiliatedKeyword::cast) - .find(|k| filter(&k.key())) -} -`; - -for (const node of nodes) { - content += ` -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ${node.struct} { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for ${node.struct} { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { ${node.kind - .map((k) => `kind == ${k}`) - .join(" || ")} } - fn cast(node: SyntaxNode) -> Option<${ - node.struct - }> { Self::can_cast(node.kind()).then(|| ${node.struct} { syntax: node }) } - fn syntax(&self) -> &SyntaxNode { &self.syntax } -} -impl ${node.struct} { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -`; - for (const [method, kind] of node.token || []) { - content += ` pub fn ${method}(&self) -> Option { super::token(&self.syntax, ${kind}) }\n`; - } - for (const [method, kind] of node.last_token || []) { - content += ` pub fn ${method}(&self) -> Option { super::last_token(&self.syntax, ${kind}) }\n`; - } - for (const [method, kind] of node.parent || []) { - content += ` pub fn ${method}(&self) -> Option<${kind}> { self.syntax.parent().and_then(${kind}::cast) }\n`; - } - for (const [method, kind] of node.first_child || []) { - content += ` pub fn ${method}(&self) -> Option<${kind}> { support::child(&self.syntax) }\n`; - } - for (const [method, kind] of node.last_child || []) { - content += ` pub fn ${method}(&self) -> Option<${kind}> { super::last_child(&self.syntax) }\n`; - } - for (const [method, kind] of node.children || []) { - content += ` pub fn ${method}(&self) -> AstChildren<${kind}> { support::children(&self.syntax) }\n`; - } - if (node.post_blank) { - content += ` pub fn post_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`; - } - if (node.pre_blank) { - content += ` pub fn pre_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`; - } - if (node.affiliated_keywords) { - content += ` pub fn caption(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "CAPTION") }\n`; - content += ` pub fn header(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "HEADER") }\n`; - content += ` pub fn name(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "NAME") }\n`; - content += ` pub fn plot(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "PLOT") }\n`; - content += ` pub fn results(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "RESULTS") }\n`; - content += ` pub fn attr(&self, backend: &str) -> Option { affiliated_keyword(&self.syntax, |k| k.starts_with("ATTR_") && &k[5..] == backend) }\n`; - } - content += `}\n`; -} - -require("fs").writeFileSync(__dirname + "/generated.rs", content); diff --git a/src/ast/generated.rs b/src/ast/generated.rs deleted file mode 100644 index 1bfd489..0000000 --- a/src/ast/generated.rs +++ /dev/null @@ -1,2276 +0,0 @@ -//! generated file, do not modify it directly -#![allow(clippy::all)] -#![allow(unused)] - -use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken}; -use rowan::{ - ast::{support, AstChildren, AstNode}, - TextRange, TextSize, -}; - -fn affiliated_keyword( - node: &SyntaxNode, - filter: impl Fn(&str) -> bool, -) -> Option { - node.children() - .take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD) - .filter_map(AffiliatedKeyword::cast) - .find(|k| filter(&k.key())) -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Document { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Document { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == DOCUMENT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Document { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Document { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn section(&self) -> Option
{ - support::child(&self.syntax) - } - pub fn first_headline(&self) -> Option { - support::child(&self.syntax) - } - pub fn last_headline(&self) -> Option { - super::last_child(&self.syntax) - } - pub fn headlines(&self) -> AstChildren { - support::children(&self.syntax) - } - pub fn pre_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Section { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Section { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SECTION - } - fn cast(node: SyntaxNode) -> Option
{ - Self::can_cast(node.kind()).then(|| Section { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Section { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Paragraph { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Paragraph { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == PARAGRAPH - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Paragraph { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Paragraph { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Headline { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Headline { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == HEADLINE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Headline { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Headline { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn section(&self) -> Option
{ - support::child(&self.syntax) - } - pub fn planning(&self) -> Option { - support::child(&self.syntax) - } - pub fn properties(&self) -> Option { - support::child(&self.syntax) - } - pub fn headlines(&self) -> AstChildren { - support::children(&self.syntax) - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct PropertyDrawer { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for PropertyDrawer { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == PROPERTY_DRAWER - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| PropertyDrawer { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl PropertyDrawer { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn node_properties(&self) -> AstChildren { - support::children(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct NodeProperty { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for NodeProperty { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == NODE_PROPERTY - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| NodeProperty { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl NodeProperty { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Planning { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Planning { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == PLANNING - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Planning { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Planning { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct OrgTable { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for OrgTable { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == ORG_TABLE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| OrgTable { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl OrgTable { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct OrgTableRow { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for OrgTableRow { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == ORG_TABLE_RULE_ROW || kind == ORG_TABLE_STANDARD_ROW - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| OrgTableRow { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl OrgTableRow { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct OrgTableCell { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for OrgTableCell { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == ORG_TABLE_CELL - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| OrgTableCell { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl OrgTableCell { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct List { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for List { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LIST - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| List { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl List { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn items(&self) -> AstChildren { - support::children(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ListItem { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for ListItem { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LIST_ITEM - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| ListItem { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl ListItem { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Drawer { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Drawer { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == DRAWER - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Drawer { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Drawer { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DynBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for DynBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == DYN_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| DynBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl DynBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Keyword { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Keyword { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == KEYWORD - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Keyword { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Keyword { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct BabelCall { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for BabelCall { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == BABEL_CALL - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| BabelCall { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl BabelCall { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct AffiliatedKeyword { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for AffiliatedKeyword { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == AFFILIATED_KEYWORD - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| AffiliatedKeyword { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl AffiliatedKeyword { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct TableEl { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for TableEl { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == TABLE_EL - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| TableEl { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl TableEl { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Clock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Clock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == CLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Clock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Clock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FnDef { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for FnDef { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == FN_DEF - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| FnDef { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl FnDef { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Comment { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Comment { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == COMMENT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Comment { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Comment { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn text(&self) -> Option { - super::token(&self.syntax, TEXT) - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Rule { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Rule { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == RULE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Rule { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Rule { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FixedWidth { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for FixedWidth { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == FIXED_WIDTH - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| FixedWidth { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl FixedWidth { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn text(&self) -> Option { - super::token(&self.syntax, TEXT) - } - pub fn post_blank(&self) -> usize { - super::blank_lines(&self.syntax) - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SpecialBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for SpecialBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SPECIAL_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| SpecialBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl SpecialBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct QuoteBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for QuoteBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == QUOTE_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| QuoteBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl QuoteBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct CenterBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for CenterBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == CENTER_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| CenterBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl CenterBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct VerseBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for VerseBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == VERSE_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| VerseBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl VerseBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct CommentBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for CommentBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == COMMENT_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| CommentBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl CommentBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ExampleBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for ExampleBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == EXAMPLE_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| ExampleBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl ExampleBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ExportBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for ExportBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == EXPORT_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| ExportBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl ExportBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SourceBlock { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for SourceBlock { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SOURCE_BLOCK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| SourceBlock { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl SourceBlock { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn caption(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "CAPTION") - } - pub fn header(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "HEADER") - } - pub fn name(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "NAME") - } - pub fn plot(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "PLOT") - } - pub fn results(&self) -> Option { - affiliated_keyword(&self.syntax, |k| k == "RESULTS") - } - pub fn attr(&self, backend: &str) -> Option { - affiliated_keyword(&self.syntax, |k| { - k.starts_with("ATTR_") && &k[5..] == backend - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct InlineCall { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for InlineCall { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == INLINE_CALL - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| InlineCall { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl InlineCall { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct InlineSrc { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for InlineSrc { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == INLINE_SRC - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| InlineSrc { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl InlineSrc { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Link { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Link { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LINK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Link { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Link { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Cookie { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Cookie { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == COOKIE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Cookie { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Cookie { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct RadioTarget { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for RadioTarget { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == RADIO_TARGET - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| RadioTarget { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl RadioTarget { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FnRef { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for FnRef { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == FN_REF - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| FnRef { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl FnRef { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Macros { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Macros { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == MACROS - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Macros { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Macros { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Snippet { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Snippet { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SNIPPET - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Snippet { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Snippet { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Target { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Target { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == TARGET - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Target { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Target { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Bold { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Bold { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == BOLD - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Bold { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Bold { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Strike { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Strike { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == STRIKE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Strike { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Strike { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Italic { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Italic { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == ITALIC - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Italic { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Italic { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Underline { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Underline { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == UNDERLINE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Underline { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Underline { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Verbatim { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Verbatim { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == VERBATIM - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Verbatim { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Verbatim { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Code { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Code { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == CODE - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Code { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Code { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn text(&self) -> Option { - super::token(&self.syntax, TEXT) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Timestamp { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Timestamp { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == TIMESTAMP_ACTIVE || kind == TIMESTAMP_INACTIVE || kind == TIMESTAMP_DIARY - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Timestamp { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Timestamp { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } - pub fn year_start(&self) -> Option { - super::token(&self.syntax, TIMESTAMP_YEAR) - } - pub fn month_start(&self) -> Option { - super::token(&self.syntax, TIMESTAMP_MONTH) - } - pub fn day_start(&self) -> Option { - super::token(&self.syntax, TIMESTAMP_DAY) - } - pub fn hour_start(&self) -> Option { - super::token(&self.syntax, TIMESTAMP_HOUR) - } - pub fn minute_start(&self) -> Option { - super::token(&self.syntax, TIMESTAMP_MINUTE) - } - pub fn year_end(&self) -> Option { - super::last_token(&self.syntax, TIMESTAMP_YEAR) - } - pub fn month_end(&self) -> Option { - super::last_token(&self.syntax, TIMESTAMP_MONTH) - } - pub fn day_end(&self) -> Option { - super::last_token(&self.syntax, TIMESTAMP_DAY) - } - pub fn hour_end(&self) -> Option { - super::last_token(&self.syntax, TIMESTAMP_HOUR) - } - pub fn minute_end(&self) -> Option { - super::last_token(&self.syntax, TIMESTAMP_MINUTE) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LatexEnvironment { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for LatexEnvironment { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LATEX_ENVIRONMENT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| LatexEnvironment { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl LatexEnvironment { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LatexFragment { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for LatexFragment { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LATEX_FRAGMENT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| LatexFragment { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl LatexFragment { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Entity { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Entity { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == ENTITY - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Entity { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Entity { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LineBreak { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for LineBreak { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == LINE_BREAK - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| LineBreak { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl LineBreak { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Superscript { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Superscript { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SUPERSCRIPT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Superscript { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Superscript { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Subscript { - pub(crate) syntax: SyntaxNode, -} -impl AstNode for Subscript { - type Language = OrgLanguage; - fn can_cast(kind: SyntaxKind) -> bool { - kind == SUBSCRIPT - } - fn cast(node: SyntaxNode) -> Option { - Self::can_cast(node.kind()).then(|| Subscript { syntax: node }) - } - fn syntax(&self) -> &SyntaxNode { - &self.syntax - } -} -impl Subscript { - /// Beginning position of this element - pub fn start(&self) -> TextSize { - self.syntax.text_range().start() - } - /// Ending position of this element - pub fn end(&self) -> TextSize { - self.syntax.text_range().end() - } - /// Range of this element - pub fn text_range(&self) -> TextRange { - self.syntax.text_range() - } - /// Raw text of this element - pub fn raw(&self) -> String { - self.syntax.to_string() - } -} diff --git a/src/ast/headline.rs b/src/ast/headline.rs deleted file mode 100644 index a465053..0000000 --- a/src/ast/headline.rs +++ /dev/null @@ -1,276 +0,0 @@ -use rowan::{ast::AstNode, NodeOrToken}; - -use crate::{syntax::SyntaxKind, SyntaxElement}; - -use super::{filter_token, Clock, Drawer, Headline, Section, Timestamp, Token}; - -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum TodoType { - Todo, - Done, -} - -impl Headline { - /// Return level of this headline - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* ").first_node::().unwrap(); - /// assert_eq!(hdl.level(), 1); - /// let hdl = Org::parse("****** hello").first_node::().unwrap(); - /// assert_eq!(hdl.level(), 6); - /// ``` - pub fn level(&self) -> usize { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::HEADLINE_STARS)) - .map_or_else( - || { - debug_assert!(false, "headline must contains HEADLINE_STARS"); - 0 - }, - |stars| stars.len(), - ) - } - - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); - /// assert_eq!(hdl.todo_keyword().unwrap(), "TODO"); - /// ``` - pub fn todo_keyword(&self) -> Option { - self.syntax - .children_with_tokens() - .find_map(|elem| match elem { - NodeOrToken::Token(tk) - if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO - || tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => - { - Some(Token(tk)) - } - _ => None, - }) - } - - /// ```rust - /// use orgize::{Org, ast::{Headline, TodoType}}; - /// - /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); - /// assert_eq!(hdl.todo_type().unwrap(), TodoType::Todo); - /// let hdl = Org::parse("*** DONE a").first_node::().unwrap(); - /// assert_eq!(hdl.todo_type().unwrap(), TodoType::Done); - /// ``` - pub fn todo_type(&self) -> Option { - self.syntax - .children_with_tokens() - .find_map(|elem| match elem { - NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO => { - Some(TodoType::Todo) - } - NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => { - Some(TodoType::Done) - } - _ => None, - }) - } - - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); - /// assert!(hdl.is_todo()); - /// let hdl = Org::parse("* a").first_node::().unwrap(); - /// assert!(!hdl.is_todo()); - /// ``` - pub fn is_todo(&self) -> bool { - matches!(self.todo_type(), Some(TodoType::Todo)) - } - - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* DONE a").first_node::().unwrap(); - /// assert!(hdl.is_done()); - /// let hdl = Org::parse("* a").first_node::().unwrap(); - /// assert!(!hdl.is_done()); - /// ``` - pub fn is_done(&self) -> bool { - matches!(self.todo_type(), Some(TodoType::Done)) - } - - /// Returns parsed title - /// - /// ```rust - /// use orgize::{Org, ast::Headline, SyntaxKind}; - /// - /// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::().unwrap(); - /// let title = hdl.title().collect::>(); - /// assert_eq!(title[1].kind(), SyntaxKind::BOLD); - /// assert_eq!(title[1].to_string(), "*abc*"); - /// assert_eq!(title[3].kind(), SyntaxKind::ITALIC); - /// assert_eq!(title[3].to_string(), "/abc/"); - /// ``` - pub fn title(&self) -> impl Iterator { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE) - .into_iter() - .flat_map(|n| n.children_with_tokens()) - } - - /// Returns title raw string - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::().unwrap(); - /// let title = hdl.title_raw(); - /// assert_eq!(title, "abc *abc* /abc/ "); - /// ``` - pub fn title_raw(&self) -> String { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE) - .map(|n| n.to_string()) - .unwrap_or_default() - } - - /// Return `true` if this headline contains a COMMENT keyword - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* COMMENT").first_node::().unwrap(); - /// assert!(hdl.is_commented()); - /// let hdl = Org::parse("* COMMENT hello").first_node::().unwrap(); - /// assert!(hdl.is_commented()); - /// let hdl = Org::parse("* hello").first_node::().unwrap(); - /// assert!(!hdl.is_commented()); - /// ``` - pub fn is_commented(&self) -> bool { - self.title() - .next() - .map(|first| { - if let Some(t) = first.as_token() { - let text = t.text(); - t.kind() == SyntaxKind::TEXT - && text.starts_with("COMMENT") - && (text.len() == 7 || text[7..].starts_with(char::is_whitespace)) - } else { - false - } - }) - .unwrap_or_default() - } - - /// Return `true` if this headline contains an archive tag - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* hello :ARCHIVE:").first_node::().unwrap(); - /// assert!(hdl.is_archived()); - /// let hdl = Org::parse("* hello :ARCHIVED:").first_node::().unwrap(); - /// assert!(!hdl.is_archived()); - /// ``` - pub fn is_archived(&self) -> bool { - self.tags().any(|t| t == "ARCHIVE") - } - - /// Returns this headline's closed timestamp, or `None` if not set. - pub fn closed(&self) -> Option { - self.planning().and_then(|planning| planning.closed()) - } - - /// Returns this headline's scheduled timestamp, or `None` if not set. - pub fn scheduled(&self) -> Option { - self.planning().and_then(|planning| planning.scheduled()) - } - - /// Returns this headline's deadline timestamp, or `None` if not set. - pub fn deadline(&self) -> Option { - self.planning().and_then(|planning| planning.deadline()) - } - - /// Returns an iterator of text token in this tags - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let tags_vec = |input: &str| { - /// let hdl = Org::parse(input).first_node::().unwrap(); - /// let tags: Vec<_> = hdl.tags().map(|t| t.to_string()).collect(); - /// tags - /// }; - /// - /// assert_eq!(tags_vec("* :tag:"), vec!["tag".to_string()]); - /// assert_eq!(tags_vec("* [#A] :::::a2%:"), vec!["a2%".to_string()]); - /// assert_eq!(tags_vec("* TODO :tag: :a2%:"), vec!["tag".to_string(), "a2%".to_string()]); - /// assert_eq!(tags_vec("* title :tag:a2%:"), vec!["tag".to_string(), "a2%".to_string()]); - /// ``` - pub fn tags(&self) -> impl Iterator { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::HEADLINE_TAGS) - .into_iter() - .flat_map(|t| t.children_with_tokens()) - .filter_map(filter_token(SyntaxKind::TEXT)) - } - - /// Returns priority text - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let hdl = Org::parse("* [#A]").first_node::().unwrap(); - /// assert_eq!(hdl.priority().unwrap(), "A"); - /// let hdl = Org::parse("** DONE [#B]::").first_node::().unwrap(); - /// assert_eq!(hdl.priority().unwrap(), "B"); - /// let hdl = Org::parse("* [#破]").first_node::().unwrap(); - /// assert_eq!(hdl.priority().unwrap(), "破"); - /// ``` - pub fn priority(&self) -> Option { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::HEADLINE_PRIORITY) - .and_then(|n| { - n.children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - }) - } - - /// Returns an iterator of clock element affiliated with this headline - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let org = Org::parse(r#"* TODO - /// foo - /// :LOGBOOK: - /// bar - /// CLOCK: - /// CLOCK: [2024-10-12] - /// baz - /// CLOCK: [2024-10-12] - /// [2024-10-12] - /// :END: - /// foo"#); - /// let hdl = org.first_node::().unwrap(); - /// assert_eq!(hdl.clocks().count(), 2); - /// ``` - pub fn clocks(&self) -> impl Iterator { - self.syntax - .children() - .flat_map(Section::cast) - .flat_map(|x| x.syntax.children().filter_map(Drawer::cast)) - .filter(|d| d.name().eq_ignore_ascii_case("LOGBOOK")) - .filter_map(|d| { - d.syntax - .children() - .find(|children| children.kind() == SyntaxKind::DRAWER_CONTENT) - }) - .flat_map(|x| x.children().filter_map(Clock::cast)) - } -} diff --git a/src/ast/inline_call.rs b/src/ast/inline_call.rs deleted file mode 100644 index 4dc2c04..0000000 --- a/src/ast/inline_call.rs +++ /dev/null @@ -1,80 +0,0 @@ -use crate::syntax::SyntaxKind; - -use super::{filter_token, InlineCall, Token}; - -impl InlineCall { - /// - /// ```rust - /// use orgize::{Org, ast::InlineCall}; - /// - /// let call = Org::parse("call_square(4)").first_node::().unwrap(); - /// assert_eq!(call.call(), "square"); - /// ``` - pub fn call(&self) -> Token { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .nth(1) - .expect("inline call must contains two TEXT") - } - - /// - /// ```rust - /// use orgize::{Org, ast::InlineCall}; - /// - /// let call = Org::parse("call_square[:results output](4)").first_node::().unwrap(); - /// assert_eq!(call.inside_header().unwrap(), ":results output"); - /// - /// let call = Org::parse("call_square(4)[:results html]").first_node::().unwrap(); - /// assert!(call.inside_header().is_none()); - /// ``` - pub fn inside_header(&self) -> Option { - self.syntax - .children_with_tokens() - .take_while(|e| e.kind() != SyntaxKind::L_PARENS) - .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) - .nth(1) - .and_then(|e| { - debug_assert_eq!(e.kind(), SyntaxKind::TEXT); - Some(Token(e.into_token()?)) - }) - } - - /// - /// ```rust - /// use orgize::{Org, ast::InlineCall}; - /// - /// let call = Org::parse("call_square(4)").first_node::().unwrap(); - /// assert_eq!(call.arguments(), "4"); - /// ``` - pub fn arguments(&self) -> Token { - self.syntax - .children_with_tokens() - .skip_while(|e| e.kind() != SyntaxKind::L_PARENS) - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("inline call must contains TEXT after L_PARENS") - } - - /// - /// ```rust - /// use orgize::{Org, ast::InlineCall}; - /// - /// let call = Org::parse("call_square[:results output](4)[:results html]").first_node::().unwrap(); - /// assert_eq!(call.end_header().unwrap(), ":results html"); - /// - /// let call = Org::parse("call_square[:results output](4)").first_node::().unwrap(); - /// assert!(call.end_header().is_none()); - /// ``` - pub fn end_header(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) - .skip(1) - .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) - .nth(1) - .and_then(|e| { - debug_assert_eq!(e.kind(), SyntaxKind::TEXT); - Some(Token(e.into_token()?)) - }) - } -} diff --git a/src/ast/inline_src.rs b/src/ast/inline_src.rs deleted file mode 100644 index 87aff35..0000000 --- a/src/ast/inline_src.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::SyntaxKind; - -use super::{filter_token, InlineSrc, Token}; - -impl InlineSrc { - /// Language of the code - /// - /// ```rust - /// use orgize::{Org, ast::InlineSrc}; - /// - /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); - /// assert_eq!(s.language(), "C"); - /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); - /// assert_eq!(s.language(), "xml"); - /// ``` - pub fn language(&self) -> Token { - self.syntax - .children_with_tokens() - .nth(1) - .and_then(filter_token(SyntaxKind::TEXT)) - .expect("inline src must contains TEXT") - } - - /// Optional header arguments - /// - /// ```rust - /// use orgize::{Org, ast::InlineSrc}; - /// - /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); - /// assert!(s.parameters().is_none()); - /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); - /// assert_eq!(s.parameters().unwrap(), ":exports code"); - /// ``` - pub fn parameters(&self) -> Option { - self.syntax - .children_with_tokens() - .skip_while(|n| n.kind() != SyntaxKind::L_BRACKET) - .nth(1) - .and_then(|n| { - debug_assert_eq!(n.kind(), SyntaxKind::TEXT); - Some(Token(n.into_token()?)) - }) - } - - /// Source code - /// - /// ```rust - /// use orgize::{Org, ast::InlineSrc}; - /// - /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); - /// assert_eq!(s.value(), "int a = 0;"); - /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); - /// assert_eq!(s.value(), "text"); - /// ``` - pub fn value(&self) -> Token { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .last() - .expect("inline src must contains TEXT") - } -} diff --git a/src/ast/keyword.rs b/src/ast/keyword.rs deleted file mode 100644 index cf0a6f4..0000000 --- a/src/ast/keyword.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::SyntaxKind; - -use super::{filter_token, Keyword, Token}; - -impl Keyword { - /// - /// ```rust - /// use orgize::{Org, ast::Keyword}; - /// - /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.key(), "KEY"); - /// ``` - pub fn key(&self) -> Token { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("keyword must contains TEXT") - } - - /// - /// ```rust - /// use orgize::{Org, ast::Keyword}; - /// - /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); - /// assert_eq!(keyword.value(), " VALUE"); - /// let keyword = Org::parse("#+KEY:").first_node::().unwrap(); - /// assert_eq!(keyword.value(), ""); - /// ``` - pub fn value(&self) -> Token { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .nth(1) - .expect("keyword must contains two TEXT") - } -} diff --git a/src/ast/link.rs b/src/ast/link.rs deleted file mode 100644 index daff761..0000000 --- a/src/ast/link.rs +++ /dev/null @@ -1,121 +0,0 @@ -use rowan::ast::AstNode; - -use super::{token, AffiliatedKeyword, Link, Paragraph, Token}; -use crate::{syntax::SyntaxKind, SyntaxElement}; - -impl Link { - /// Returns link destination - /// - /// ```rust - /// use orgize::{Org, ast::Link}; - /// - /// let link = Org::parse("[[#id]]").first_node::().unwrap(); - /// assert_eq!(link.path(), "#id"); - /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); - /// assert_eq!(link.path(), "https://google.com"); - /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); - /// assert_eq!(link.path(), "https://google.com"); - /// ``` - pub fn path(&self) -> Token { - token(&self.syntax, SyntaxKind::LINK_PATH).expect("link must contains LINK_PATH") - } - - /// Returns `true` if link contains description - /// - /// ```rust - /// use orgize::{Org, ast::Link}; - /// - /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); - /// assert!(!link.has_description()); - /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); - /// assert!(link.has_description()); - /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); - /// assert!(link.has_description()); - /// ``` - pub fn has_description(&self) -> bool { - self.syntax() - .children_with_tokens() - .any(|e| e.kind() == SyntaxKind::L_BRACKET) - } - - /// Returns parsed description - /// - /// Returns empty iterator if this link doesn't contain description - /// - /// ```rust - /// use orgize::{Org, ast::Link, SyntaxKind}; - /// - /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); - /// assert_eq!(link.description().count(), 0); - /// - /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); - /// let description = link.description().collect::>(); - /// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::TEXT, "Google".into())); - /// - /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); - /// let description = link.description().collect::>(); - /// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::BOLD, "*abc*".into())); - /// assert_eq!((description[2].kind(), description[2].to_string()), (SyntaxKind::ITALIC, "/abc/".into())); - /// ``` - pub fn description(&self) -> impl Iterator { - self.syntax() - .children_with_tokens() - .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) - .skip(1) - .take_while(|e| e.kind() != SyntaxKind::R_BRACKET2) - } - - /// Returns description raw string - /// - /// Returns empty string if this link doesn't contain description - /// - /// ```rust - /// use orgize::{Org, ast::Link}; - /// - /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); - /// assert_eq!(link.description_raw(), ""); - /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); - /// assert_eq!(link.description_raw(), "Google"); - /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); - /// assert_eq!(link.description_raw(), "*abc* /abc/"); - /// ``` - pub fn description_raw(&self) -> String { - self.description() - .fold(String::new(), |acc, e| acc + &e.to_string()) - } - - /// Returns `true` if link is an image link - /// - /// ```rust - /// use orgize::{Org, ast::Link}; - /// - /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); - /// assert!(!link.is_image()); - /// let link = Org::parse("[[file:/home/dominik/images/jupiter.jpg]]").first_node::().unwrap(); - /// assert!(link.is_image()); - /// ``` - pub fn is_image(&self) -> bool { - const IMAGE_SUFFIX: &[&str] = &[ - // https://github.com/bzg/org-mode/blob/7de1e818d5fbe6a05c6b1a007eed07dc27e7246b/lisp/ox.el#L253 - ".png", ".jpeg", ".jpg", ".gif", ".tiff", ".tif", ".xbm", ".xpm", ".pbm", ".pgm", - ".ppm", ".webp", ".avif", ".svg", - ]; - - let path = self.path(); - - IMAGE_SUFFIX.iter().any(|e| path.ends_with(e)) && !self.has_description() - } - - /// Returns caption keyword in this link - /// - /// ```rust - /// use orgize::{Org, ast::Link}; - /// - /// let link = Org::parse("#+CAPTION: image link\n[[file:/home/dominik/images/jupiter.jpg]]").first_node::().unwrap(); - /// assert_eq!(link.caption().unwrap().value().unwrap(), " image link"); - /// ``` - pub fn caption(&self) -> Option { - // TODO: support other element type - Paragraph::cast(self.syntax.parent()?.clone())?.caption() - } -} diff --git a/src/ast/list.rs b/src/ast/list.rs deleted file mode 100644 index d53bbc9..0000000 --- a/src/ast/list.rs +++ /dev/null @@ -1,139 +0,0 @@ -use super::{filter_token, List, ListItem, Token}; -use crate::{syntax::SyntaxKind, SyntaxElement}; - -impl List { - /// Returns `true` if this list is an ordered link - /// - /// ```rust - /// use orgize::{Org, ast::List}; - /// - /// let list = Org::parse("+ 1").first_node::().unwrap(); - /// assert!(!list.is_ordered()); - /// - /// let list = Org::parse("1. 1").first_node::().unwrap(); - /// assert!(list.is_ordered()); - /// - /// let list = Org::parse("1) 1\n- 2\n3. 3").first_node::().unwrap(); - /// assert!(list.is_ordered()); - /// ``` - pub fn is_ordered(&self) -> bool { - self.items().next().map_or_else( - || { - debug_assert!(false, "list muts contains LIST_ITEM"); - false - }, - |item| item.bullet().starts_with(|c: char| c.is_ascii_digit()), - ) - } - - /// Returns `true` if this list contains a TAG - /// - /// ```rust - /// use orgize::{Org, ast::List}; - /// - /// let list = Org::parse("- some tag :: item 2.1").first_node::().unwrap(); - /// assert!(list.is_descriptive()); - /// let list = Org::parse("2. [X] item 2").first_node::().unwrap(); - /// assert!(!list.is_descriptive()); - /// ``` - pub fn is_descriptive(&self) -> bool { - self.items().next().map_or_else( - || { - debug_assert!(false, "list must contains LIST_ITEM"); - false - }, - |item| { - item.syntax - .children() - .any(|it| it.kind() == SyntaxKind::LIST_ITEM_TAG) - }, - ) - } -} - -impl ListItem { - /// ```rust - /// use orgize::{Org, ast::ListItem}; - /// - /// let item = Org::parse("- 1").first_node::().unwrap(); - /// assert_eq!(item.indent(), 0); - /// let item = Org::parse(" \t * 2").first_node::().unwrap(); - /// assert_eq!(item.indent(), 3); - /// ``` - pub fn indent(&self) -> usize { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::LIST_ITEM_INDENT)) - .map_or_else( - || { - debug_assert!(false, "list item must contains LIST_ITEM_INDENT"); - 0 - }, - |t| t.len(), - ) - } - - /// ```rust - /// use orgize::{Org, ast::ListItem}; - /// - /// let item = Org::parse("- some tag").first_node::().unwrap(); - /// assert_eq!(item.bullet(), "- "); - /// let item = Org::parse("2. [X] item 2").first_node::().unwrap(); - /// assert_eq!(item.bullet(), "2. "); - /// ``` - pub fn bullet(&self) -> Token { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::LIST_ITEM_BULLET)) - .expect("list item must contains LIST_ITEM_BULLET") - } - - /// ```rust - /// use orgize::{Org, ast::ListItem}; - /// - /// let item = Org::parse("- [-] item 1").first_node::().unwrap(); - /// assert_eq!(item.checkbox().unwrap(), "-"); - /// let item = Org::parse("2. [X] item 2").first_node::().unwrap(); - /// assert_eq!(item.checkbox().unwrap(), "X"); - /// let item = Org::parse("3) [ ] item 3").first_node::().unwrap(); - /// assert_eq!(item.checkbox().unwrap(), " "); - /// ``` - pub fn checkbox(&self) -> Option { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::LIST_ITEM_CHECK_BOX) - .and_then(|n| { - n.children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - }) - } - - pub fn counter(&self) -> Option { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::LIST_ITEM_COUNTER) - .and_then(|n| { - n.children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - }) - } - - /// ```rust - /// use orgize::{Org, ast::ListItem}; - /// - /// let item = Org::parse("+ this is *TAG* :: item1").first_node::().unwrap(); - /// let tag = item.tag().map(|n| n.to_string()).collect::(); - /// assert_eq!(tag, "this is *TAG* "); - /// ``` - pub fn tag(&self) -> impl Iterator { - self.syntax - .children() - .find(|n| n.kind() == SyntaxKind::LIST_ITEM_TAG) - .into_iter() - .flat_map(|n| { - n.children_with_tokens().filter(|n| { - n.kind() != SyntaxKind::WHITESPACE && n.kind() != SyntaxKind::COLON2 - }) - }) - } -} diff --git a/src/ast/macros.rs b/src/ast/macros.rs deleted file mode 100644 index 91dfa57..0000000 --- a/src/ast/macros.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::SyntaxKind; - -use super::{filter_token, Macros, Token}; - -impl Macros { - /// ```rust - /// use orgize::{Org, ast::Macros}; - /// - /// let m = Org::parse("{{{title}}}").first_node::().unwrap(); - /// assert_eq!(m.key(), "title"); - /// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::().unwrap(); - /// assert_eq!(m.key(), "two_arg_macro"); - /// ``` - pub fn key(&self) -> Token { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("macros must contains TEXT") - } - - /// ```rust - /// use orgize::{Org, ast::Macros}; - /// - /// let m = Org::parse("{{{title}}}").first_node::().unwrap(); - /// assert!(m.args().is_none()); - /// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::().unwrap(); - /// assert_eq!(m.args().unwrap(), "1, 2"); - /// ``` - pub fn args(&self) -> Option { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .nth(1) - } -} diff --git a/src/ast/mod.rs b/src/ast/mod.rs deleted file mode 100644 index 9cd7817..0000000 --- a/src/ast/mod.rs +++ /dev/null @@ -1,176 +0,0 @@ -mod generated; - -mod affiliated_keyword; -mod block; -mod clock; -#[cfg(feature = "syntax-org-fc")] -mod cloze; -mod comment; -mod document; -mod drawer; -mod entity; -mod fixed_width; -mod headline; -mod inline_call; -mod inline_src; -mod keyword; -mod link; -mod list; -mod macros; -mod planning; -mod snippet; -mod table; -mod timestamp; - -#[cfg(feature = "syntax-org-fc")] -pub use cloze::*; -pub use generated::*; -pub use headline::*; -pub use rowan::ast::support::*; -pub use timestamp::*; - -use crate::{ - syntax::{SyntaxKind, SyntaxNode}, - SyntaxToken, -}; -use rowan::{ast::AstNode, NodeOrToken, TextRange, TextSize}; -use std::{ - borrow::{Borrow, Cow}, - fmt, - hash::Hash, - ops::Deref, -}; - -pub fn blank_lines(parent: &SyntaxNode) -> usize { - parent - .children_with_tokens() - .filter(|n| n.kind() == SyntaxKind::BLANK_LINE) - .count() -} - -pub fn last_child(parent: &rowan::SyntaxNode) -> Option { - parent.children().filter_map(N::cast).last() -} - -pub fn last_token(parent: &SyntaxNode, kind: SyntaxKind) -> Option { - parent - .children_with_tokens() - .filter_map(filter_token(kind)) - .last() -} - -pub fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option { - rowan::ast::support::token(parent, kind).map(Token) -} - -pub fn filter_token( - kind: SyntaxKind, -) -> impl Fn(NodeOrToken) -> Option { - move |elem| match elem { - NodeOrToken::Token(tk) if tk.kind() == kind => Some(Token(tk)), - _ => None, - } -} - -/// A simple wrapper of `SyntaxToken` -/// -/// It implements the `AsRef` and `Display` trait, -/// allowing to directly use some `str` methods. -/// -/// Also it implements `Hash` and `Eq` traits, so can be -/// used as keys in `HashMap`. However, note that it only -/// compares the underlying text inside `SyntaxToken`, -/// meaning two `Token`s from different positions -/// might be considered equal. -#[derive(Eq, Clone)] -pub struct Token(pub(crate) SyntaxToken); - -impl Token { - pub fn syntax(&self) -> &SyntaxToken { - &self.0 - } - - /// Range of this token - pub fn text_range(&self) -> TextRange { - self.0.text_range() - } - - /// Beginning position of this token - pub fn start(&self) -> TextSize { - self.0.text_range().start() - } - - /// Ending position of this token - pub fn end(&self) -> TextSize { - self.0.text_range().end() - } -} - -impl AsRef for Token { - fn as_ref(&self) -> &str { - self.0.text() - } -} - -impl Borrow for Token { - fn borrow(&self) -> &str { - self.as_ref() - } -} - -impl<'a> PartialEq<&'a str> for Token { - fn eq(&self, other: &&'a str) -> bool { - self.as_ref() == *other - } -} - -impl PartialEq for Token { - fn eq(&self, other: &String) -> bool { - self.as_ref() == other - } -} - -impl PartialEq for Token { - fn eq(&self, other: &Token) -> bool { - self.as_ref() == other.as_ref() - } -} - -impl Hash for Token { - fn hash(&self, state: &mut H) { - self.as_ref().hash(state) - } -} - -impl<'a> PartialEq> for Token { - fn eq(&self, other: &Cow<'a, str>) -> bool { - self.as_ref() == other - } -} - -impl PartialEq for Token { - fn eq(&self, other: &str) -> bool { - self.as_ref() == other - } -} - -impl Deref for Token { - type Target = str; - - #[inline] - fn deref(&self) -> &str { - self.as_ref() - } -} - -impl fmt::Debug for Token { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.0.text(), f) - } -} - -impl fmt::Display for Token { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.0.text(), f) - } -} diff --git a/src/ast/planning.rs b/src/ast/planning.rs deleted file mode 100644 index 6a8bba3..0000000 --- a/src/ast/planning.rs +++ /dev/null @@ -1,67 +0,0 @@ -use rowan::ast::AstNode; - -use super::{Planning, Timestamp}; -use crate::syntax::SyntaxKind; - -impl Planning { - /// Returns deadline timestamp - /// - /// - /// ```rust - /// use orgize::{ast::Planning, Org}; - /// - /// let s = Org::parse("* a\nDEADLINE: <2019-04-08 Mon>") - /// .first_node::() - /// .unwrap() - /// .deadline() - /// .unwrap(); - /// assert_eq!(s.day_start().unwrap(), "08"); - /// ``` - pub fn deadline(&self) -> Option { - self.syntax - .children() - .filter(|n| n.kind() == SyntaxKind::PLANNING_DEADLINE) - .last() - .and_then(|n| n.children().find_map(Timestamp::cast)) - } - - /// Returns scheduled timestamp - /// - /// ```rust - /// use orgize::{ast::Planning, Org}; - /// - /// let s = Org::parse("* a\nSCHEDULED: <2019-04-08 Mon>") - /// .first_node::() - /// .unwrap() - /// .scheduled() - /// .unwrap(); - /// assert_eq!(s.year_start().unwrap(), "2019"); - /// ``` - pub fn scheduled(&self) -> Option { - self.syntax - .children() - .filter(|n| n.kind() == SyntaxKind::PLANNING_SCHEDULED) - .last() - .and_then(|n| n.children().find_map(Timestamp::cast)) - } - - /// Returns closed timestamp - /// - /// ```rust - /// use orgize::{ast::Planning, Org}; - /// - /// let s = Org::parse("* a\nCLOSED: <2019-04-08 Mon>") - /// .first_node::() - /// .unwrap() - /// .closed() - /// .unwrap(); - /// assert_eq!(s.month_start().unwrap(), "04"); - /// ``` - pub fn closed(&self) -> Option { - self.syntax - .children() - .filter(|n| n.kind() == SyntaxKind::PLANNING_CLOSED) - .last() - .and_then(|n| n.children().find_map(Timestamp::cast)) - } -} diff --git a/src/ast/snippet.rs b/src/ast/snippet.rs deleted file mode 100644 index 393713b..0000000 --- a/src/ast/snippet.rs +++ /dev/null @@ -1,34 +0,0 @@ -use crate::syntax::SyntaxKind; - -use super::{filter_token, Snippet, Token}; - -impl Snippet { - /// ```rust - /// use orgize::{Org, ast::Snippet}; - /// - /// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::().unwrap(); - /// assert_eq!(snippet.backend(), "BACKEND"); - /// ``` - pub fn backend(&self) -> Token { - self.syntax - .children_with_tokens() - .find_map(filter_token(SyntaxKind::TEXT)) - .expect("snippet must contains TEXT") - } - - /// ```rust - /// use orgize::{Org, ast::Snippet}; - /// - /// let snippet = Org::parse("@@BACKEND:@@").first_node::().unwrap(); - /// assert_eq!(snippet.value(), ""); - /// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::().unwrap(); - /// assert_eq!(snippet.value(), "VALUE"); - /// ``` - pub fn value(&self) -> Token { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .nth(1) - .expect("snippet must contains two TEXT") - } -} diff --git a/src/ast/table.rs b/src/ast/table.rs deleted file mode 100644 index 39999ee..0000000 --- a/src/ast/table.rs +++ /dev/null @@ -1,110 +0,0 @@ -use rowan::ast::AstNode; - -use super::{filter_token, OrgTable, OrgTableRow, Token}; -use crate::syntax::SyntaxKind; - -impl OrgTable { - /// Returns `true` if this table has a header - /// - /// A table has a header when it contains at least two row groups. - /// - /// ```rust - /// use orgize::{Org, ast::OrgTable}; - /// - /// let org = Org::parse(r#" - /// | a | b | - /// |---+---| - /// | c | d |"#); - /// let table = org.first_node::().unwrap(); - /// assert!(table.has_header()); - /// - /// let org = Org::parse(r#" - /// | a | b | - /// | 0 | 1 | - /// |---+---| - /// | a | w |"#); - /// let table = org.first_node::().unwrap(); - /// assert!(table.has_header()); - /// - /// let org = Org::parse(r#" - /// | a | b | - /// | c | d |"#); - /// let table = org.first_node::().unwrap(); - /// assert!(!table.has_header()); - /// - /// let org = Org::parse(r#" - /// |---+---| - /// | a | b | - /// | c | d | - /// |---+---|"#); - /// let table = org.first_node::().unwrap(); - /// assert!(!table.has_header()); - /// ``` - pub fn has_header(&self) -> bool { - self.syntax - .children() - .filter_map(OrgTableRow::cast) - .skip_while(|row| row.is_rule()) - .skip_while(|row| row.is_standard()) - .any(|row| !row.is_rule()) - } - - /// Formulas associated to the table - /// - /// ```rust - /// use orgize::{Org, ast::OrgTable}; - /// - /// let table = Org::parse("| a |").first_node::().unwrap(); - /// assert_eq!(table.tblfm().count(), 0); - /// - /// let table = Org::parse("| a |\n#+tblfm: test").first_node::().unwrap(); - /// let tblfm = table.tblfm().collect::>(); - /// assert_eq!(tblfm.len(), 1); - /// assert_eq!(tblfm[0], " test"); - /// - /// let table = Org::parse("| a |\n#+TBLFM: test1\n#+TBLFM: test2").first_node::().unwrap(); - /// let tblfm = table.tblfm().collect::>(); - /// assert_eq!(tblfm.len(), 2); - /// assert_eq!(tblfm[0], " test1"); - /// assert_eq!(tblfm[1], " test2"); - /// ``` - pub fn tblfm(&self) -> impl Iterator { - self.syntax.children().filter_map(|n| { - if n.kind() == SyntaxKind::KEYWORD { - n.children_with_tokens() - .filter_map(filter_token(SyntaxKind::TEXT)) - .last() - } else { - None - } - }) - } -} - -impl OrgTableRow { - /// Returns `true` if this row is a rule - /// - /// ```rust - /// use orgize::{Org, ast::OrgTableRow}; - /// - /// let org = Org::parse("|----|----|\n|Foo |Bar |"); - /// let row = org.first_node::().unwrap(); - /// assert!(row.is_rule()); - /// ``` - pub fn is_rule(&self) -> bool { - self.syntax.kind() == SyntaxKind::ORG_TABLE_RULE_ROW - } - - /// Returns `true` if this row is a standard row - /// - /// ```rust - /// use orgize::{Org, ast::OrgTableRow}; - /// - /// let org = Org::parse("|Foo |Bar |\n|----|----|"); - /// let row = org.first_node::().unwrap(); - /// assert!(row.is_standard()); - /// ``` - pub fn is_standard(&self) -> bool { - self.syntax.kind() == SyntaxKind::ORG_TABLE_STANDARD_ROW - } -} diff --git a/src/ast/timestamp.rs b/src/ast/timestamp.rs deleted file mode 100644 index 6d8a1d6..0000000 --- a/src/ast/timestamp.rs +++ /dev/null @@ -1,301 +0,0 @@ -use super::{filter_token, Timestamp}; -use crate::syntax::SyntaxKind; - -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum TimeUnit { - Hour, - Day, - Week, - Month, - Year, -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum RepeaterType { - Cumulate, - CatchUp, - Restart, -} - -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum DelayType { - All, - First, -} - -impl Timestamp { - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let ts = Org::parse("<2003-09-16 Tue 09:39-10:39>").first_node::().unwrap(); - /// assert!(ts.is_active()); - /// let ts = Org::parse("<2003-09-16 Tue 09:39>--<2003-09-16 Tue 10:39>").first_node::().unwrap(); - /// assert!(ts.is_active()); - /// let ts = Org::parse("<2003-09-16 Tue 09:39>").first_node::().unwrap(); - /// assert!(ts.is_active()); - /// ``` - pub fn is_active(&self) -> bool { - self.syntax.kind() == SyntaxKind::TIMESTAMP_ACTIVE - } - - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert!(ts.is_inactive()); - /// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::().unwrap(); - /// assert!(ts.is_inactive()); - /// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::().unwrap(); - /// assert!(ts.is_inactive()); - /// ``` - pub fn is_inactive(&self) -> bool { - self.syntax.kind() == SyntaxKind::TIMESTAMP_INACTIVE - } - - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let ts = Org::parse("<%%(org-calendar-holiday)>").first_node::().unwrap(); - /// assert!(ts.is_diary()); - /// ``` - pub fn is_diary(&self) -> bool { - self.syntax.kind() == SyntaxKind::TIMESTAMP_DIARY - } - - /// Returns `true` if this timestamp has a range - /// - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert!(ts.is_range()); - /// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::().unwrap(); - /// assert!(ts.is_range()); - /// let ts = Org::parse("[2003-09-16]--[2003-09-16]").first_node::().unwrap(); - /// assert!(ts.is_range()); - /// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::().unwrap(); - /// assert!(!ts.is_range()); - /// ``` - pub fn is_range(&self) -> bool { - self.syntax - .children_with_tokens() - .filter_map(filter_token(SyntaxKind::MINUS)) - .count() - > 2 - } - - /// ```rust - /// use orgize::{Org, ast::{Timestamp, RepeaterType}}; - /// - /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_type(), Some(RepeaterType::Cumulate)); - /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_type(), Some(RepeaterType::Restart)); - /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); - /// assert_eq!(t.repeater_type(), None); - /// ``` - pub fn repeater_type(&self) -> Option { - self.nth_repeater(0).map(|i| i.0) - } - - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_value(), Some(1)); - /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_value(), Some(10)); - /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); - /// assert_eq!(t.repeater_value(), None); - /// ``` - pub fn repeater_value(&self) -> Option { - self.nth_repeater(0).map(|i| i.1) - } - - /// ```rust - /// use orgize::{Org, ast::{Timestamp, TimeUnit}}; - /// - /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_unit(), Some(TimeUnit::Week)); - /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); - /// assert_eq!(t.repeater_unit(), Some(TimeUnit::Day)); - /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); - /// assert_eq!(t.repeater_unit(), None); - /// ``` - pub fn repeater_unit(&self) -> Option { - self.nth_repeater(0).map(|i| i.2) - } - - /// ```rust - /// use orgize::{Org, ast::{Timestamp, DelayType}}; - /// - /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); - /// assert_eq!(t.warning_type(), Some(DelayType::All)); - /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); - /// assert_eq!(t.warning_type(), Some(DelayType::All)); - /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); - /// assert_eq!(t.warning_type(), Some(DelayType::First)); - /// ``` - pub fn warning_type(&self) -> Option { - self.nth_delay(0).map(|i| i.0) - } - - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); - /// assert_eq!(t.warning_value(), Some(3)); - /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); - /// assert_eq!(t.warning_value(), Some(5)); - /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); - /// assert_eq!(t.warning_value(), Some(10)); - /// ``` - pub fn warning_value(&self) -> Option { - self.nth_delay(0).map(|i| i.1) - } - - /// ```rust - /// use orgize::{Org, ast::{Timestamp, TimeUnit}}; - /// - /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); - /// assert_eq!(t.warning_unit(), Some(TimeUnit::Year)); - /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); - /// assert_eq!(t.warning_unit(), Some(TimeUnit::Week)); - /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); - /// assert_eq!(t.warning_unit(), Some(TimeUnit::Month)); - /// ``` - pub fn warning_unit(&self) -> Option { - self.nth_delay(0).map(|i| i.2) - } - - fn nth_repeater(&self, nth: usize) -> Option<(RepeaterType, u32, TimeUnit)> { - let mut i = nth + 1; - - let mut iter = self.syntax.children_with_tokens().skip_while(|n| { - if n.kind() == SyntaxKind::TIMESTAMP_REPEATER_MARK { - i -= 1; - i != 0 - } else { - true - } - }); - - let mark = iter.next().and_then(|n| match n.as_token()?.text() { - "++" => Some(RepeaterType::CatchUp), - "+" => Some(RepeaterType::Cumulate), - ".+" => Some(RepeaterType::Restart), - _ => None, - })?; - let value = iter - .next() - .and_then(|n| n.as_token()?.text().parse::().ok())?; - let unit = iter.next().and_then(|n| match n.as_token()?.text() { - "h" => Some(TimeUnit::Hour), - "d" => Some(TimeUnit::Day), - "w" => Some(TimeUnit::Week), - "m" => Some(TimeUnit::Month), - "y" => Some(TimeUnit::Year), - _ => None, - })?; - - Some((mark, value, unit)) - } - - fn nth_delay(&self, nth: usize) -> Option<(DelayType, u32, TimeUnit)> { - let mut i = nth + 1; - - let mut iter = self.syntax.children_with_tokens().skip_while(|n| { - if n.kind() == SyntaxKind::TIMESTAMP_DELAY_MARK { - i -= 1; - i != 0 - } else { - true - } - }); - - let mark = iter.next().and_then(|n| match n.as_token()?.text() { - "-" => Some(DelayType::All), - "--" => Some(DelayType::First), - _ => None, - })?; - let value = iter - .next() - .and_then(|n| n.as_token()?.text().parse::().ok())?; - let unit = iter.next().and_then(|n| match n.as_token()?.text() { - "h" => Some(TimeUnit::Hour), - "d" => Some(TimeUnit::Day), - "w" => Some(TimeUnit::Week), - "m" => Some(TimeUnit::Month), - "y" => Some(TimeUnit::Year), - _ => None, - })?; - - Some((mark, value, unit)) - } - - /// Converts timestamp start to chrono NaiveDateTime - /// - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// use chrono::NaiveDateTime; - /// - /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert_eq!(ts.start_to_chrono().unwrap(), "2003-09-16T09:39:00".parse::().unwrap()); - /// - /// let ts = Org::parse("[2003-13-00 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert!(ts.start_to_chrono().is_none()); - /// ``` - #[cfg(feature = "chrono")] - pub fn start_to_chrono(&self) -> Option { - Some(chrono::NaiveDateTime::new( - chrono::NaiveDate::from_ymd_opt( - self.year_start()?.parse().ok()?, - self.month_start()?.parse().ok()?, - self.day_start()?.parse().ok()?, - )?, - chrono::NaiveTime::from_hms_opt( - self.hour_start()?.parse().ok()?, - self.minute_start()?.parse().ok()?, - 0, - )?, - )) - } - - /// Converts timestamp end to chrono NaiveDateTime - /// - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// use chrono::NaiveDateTime; - /// - /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert_eq!(ts.end_to_chrono().unwrap(), "2003-09-16T10:39:00".parse::().unwrap()); - /// ``` - #[cfg(feature = "chrono")] - pub fn end_to_chrono(&self) -> Option { - Some(chrono::NaiveDateTime::new( - chrono::NaiveDate::from_ymd_opt( - self.year_end()?.parse().ok()?, - self.month_end()?.parse().ok()?, - self.day_end()?.parse().ok()?, - )?, - chrono::NaiveTime::from_hms_opt( - self.hour_end()?.parse().ok()?, - self.minute_end()?.parse().ok()?, - 0, - )?, - )) - } - - /// Returns chrono::TimeDelta between timestamp start and end - /// - /// ```rust - /// use orgize::{Org, ast::Timestamp}; - /// - /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); - /// assert_eq!(ts.time_delta().unwrap().num_hours(), 1); - /// ``` - #[cfg(feature = "chrono")] - pub fn time_delta(&self) -> Option { - Some(self.end_to_chrono()? - self.start_to_chrono()?) - } -} diff --git a/src/config.rs b/src/config.rs index a51db78..955252d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,87 +1,18 @@ -use crate::syntax::document::document_node; -use crate::Org; - -#[derive(Clone, Debug)] -pub enum UseSubSuperscript { - Nil, - Brace, - True, -} - -impl UseSubSuperscript { - pub fn is_nil(&self) -> bool { - matches!(self, UseSubSuperscript::Nil) - } - - pub fn is_true(&self) -> bool { - matches!(self, UseSubSuperscript::True) - } - - pub fn is_brace(&self) -> bool { - matches!(self, UseSubSuperscript::Brace) - } -} - /// Parse configuration #[derive(Clone, Debug)] pub struct ParseConfig { /// Headline's todo keywords pub todo_keywords: (Vec, Vec), - - pub dual_keywords: Vec, - - pub parsed_keywords: Vec, - - /// Control sub/superscript parsing - /// - /// Equivalent to `org-use-sub-superscripts` - /// - /// - `UseSubSuperscript::Nil`: disable parsing - /// - `UseSubSuperscript::True`: enable parsing - /// - `UseSubSuperscript::Brace`: enable parsing, but braces are required - pub use_sub_superscript: UseSubSuperscript, - - /// Affiliated keywords - /// - /// Equivalent to [`org-element-affiliated-keywords`](https://git.sr.ht/~bzg/org-mode/tree/6f960f3c6a4dfe137fbd33fef9f7dadfd229600c/item/lisp/org-element.el#L331) - pub affiliated_keywords: Vec, -} - -impl ParseConfig { - /// Parses input with current config - pub fn parse(self, input: impl AsRef) -> Org { - let input = (input.as_ref(), &self).into(); - let node = document_node(input).unwrap().1; - - Org { - config: self, - green: node.into_node().unwrap(), - } - } } impl Default for ParseConfig { fn default() -> Self { ParseConfig { - todo_keywords: (vec!["TODO".into()], vec!["DONE".into()]), - dual_keywords: vec!["CAPTION".into(), "RESULTS".into()], - parsed_keywords: vec!["CAPTION".into()], - use_sub_superscript: UseSubSuperscript::True, - affiliated_keywords: vec![ - "CAPTION".into(), - "DATA".into(), - "HEADER".into(), - "HEADERS".into(), - "LABEL".into(), - "NAME".into(), - "PLOT".into(), - "RESNAME".into(), - "RESULT".into(), - "RESULTS".into(), - "SOURCE".into(), - "SRCNAME".into(), - "TBLNAME".into(), - ], + todo_keywords: (vec![String::from("TODO")], vec![String::from("DONE")]), } } } + +lazy_static::lazy_static! { + pub static ref DEFAULT_CONFIG: ParseConfig = ParseConfig::default(); +} diff --git a/src/elements/block.rs b/src/elements/block.rs new file mode 100644 index 0000000..f138950 --- /dev/null +++ b/src/elements/block.rs @@ -0,0 +1,408 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::tag_no_case, + character::complete::{alpha1, space0}, + sequence::preceded, + IResult, +}; + +use crate::elements::Element; +use crate::parse::combinators::{blank_lines_count, line, lines_till}; + +/// Special Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct SpecialBlock<'a> { + /// Block parameters + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub parameters: Option>, + /// Block name + pub name: Cow<'a, str>, + /// Numbers of blank lines between first block's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl SpecialBlock<'_> { + pub fn into_owned(self) -> SpecialBlock<'static> { + SpecialBlock { + name: self.name.into_owned().into(), + parameters: self.parameters.map(Into::into).map(Cow::Owned), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +/// Quote Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct QuoteBlock<'a> { + /// Optional block parameters + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub parameters: Option>, + /// Numbers of blank lines between first block's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl QuoteBlock<'_> { + pub fn into_owned(self) -> QuoteBlock<'static> { + QuoteBlock { + parameters: self.parameters.map(Into::into).map(Cow::Owned), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +/// Center Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct CenterBlock<'a> { + /// Optional block parameters + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub parameters: Option>, + /// Numbers of blank lines between first block's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl CenterBlock<'_> { + pub fn into_owned(self) -> CenterBlock<'static> { + CenterBlock { + parameters: self.parameters.map(Into::into).map(Cow::Owned), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +/// Verse Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct VerseBlock<'a> { + /// Optional block parameters + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub parameters: Option>, + /// Numbers of blank lines between first block's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl VerseBlock<'_> { + pub fn into_owned(self) -> VerseBlock<'static> { + VerseBlock { + parameters: self.parameters.map(Into::into).map(Cow::Owned), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +/// Comment Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct CommentBlock<'a> { + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub data: Option>, + /// Comment block contents + pub contents: Cow<'a, str>, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl CommentBlock<'_> { + pub fn into_owned(self) -> CommentBlock<'static> { + CommentBlock { + data: self.data.map(Into::into).map(Cow::Owned), + contents: self.contents.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +/// Example Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct ExampleBlock<'a> { + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub data: Option>, + /// Block contents + pub contents: Cow<'a, str>, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl ExampleBlock<'_> { + pub fn into_owned(self) -> ExampleBlock<'static> { + ExampleBlock { + data: self.data.map(Into::into).map(Cow::Owned), + contents: self.contents.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +/// Export Block Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct ExportBlock<'a> { + pub data: Cow<'a, str>, + /// Block contents + pub contents: Cow<'a, str>, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl ExportBlock<'_> { + pub fn into_owned(self) -> ExportBlock<'static> { + ExportBlock { + data: self.data.into_owned().into(), + contents: self.contents.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +/// Src Block Element +#[derive(Debug, Default, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct SourceBlock<'a> { + /// Block contents + pub contents: Cow<'a, str>, + /// Language of the code in the block + pub language: Cow<'a, str>, + pub arguments: Cow<'a, str>, + /// Numbers of blank lines between last block's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl SourceBlock<'_> { + pub fn into_owned(self) -> SourceBlock<'static> { + SourceBlock { + language: self.language.into_owned().into(), + arguments: self.arguments.into_owned().into(), + contents: self.contents.into_owned().into(), + post_blank: self.post_blank, + } + } + + // TODO: fn number_lines() -> Some(New) | Some(Continued) | None { } + // TODO: fn preserve_indent() -> bool { } + // TODO: fn use_labels() -> bool { } + // TODO: fn label_fmt() -> Option { } + // TODO: fn retain_labels() -> bool { } +} + +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct RawBlock<'a> { + pub name: &'a str, + pub arguments: &'a str, + + pub pre_blank: usize, + pub contents: &'a str, + pub contents_without_blank_lines: &'a str, + + pub post_blank: usize, +} + +impl<'a> RawBlock<'a> { + pub fn parse(input: &str) -> Option<(&str, RawBlock)> { + parse_internal(input).ok() + } + + pub fn into_element(self) -> (Element<'a>, &'a str) { + let RawBlock { + name, + contents, + arguments, + pre_blank, + contents_without_blank_lines, + post_blank, + } = self; + + let arguments: Option> = if arguments.is_empty() { + None + } else { + Some(arguments.into()) + }; + + let element = match &*name.to_uppercase() { + "CENTER" => CenterBlock { + parameters: arguments, + pre_blank, + post_blank, + } + .into(), + "QUOTE" => QuoteBlock { + parameters: arguments, + pre_blank, + post_blank, + } + .into(), + "VERSE" => VerseBlock { + parameters: arguments, + pre_blank, + post_blank, + } + .into(), + "COMMENT" => CommentBlock { + data: arguments, + contents: contents.into(), + post_blank, + } + .into(), + "EXAMPLE" => ExampleBlock { + data: arguments, + contents: contents.into(), + post_blank, + } + .into(), + "EXPORT" => ExportBlock { + data: arguments.unwrap_or_default(), + contents: contents.into(), + post_blank, + } + .into(), + "SRC" => { + let (language, arguments) = match &arguments { + Some(Cow::Borrowed(args)) => { + let (language, arguments) = + args.split_at(args.find(' ').unwrap_or_else(|| args.len())); + (language.into(), arguments.into()) + } + None => (Cow::Borrowed(""), Cow::Borrowed("")), + _ => unreachable!( + "`parse_block_element` returns `Some(Cow::Borrowed)` or `None`" + ), + }; + SourceBlock { + arguments, + language, + contents: contents.into(), + post_blank, + } + .into() + } + _ => SpecialBlock { + parameters: arguments, + name: name.into(), + pre_blank, + post_blank, + } + .into(), + }; + + (element, contents_without_blank_lines) + } +} + +fn parse_internal(input: &str) -> IResult<&str, RawBlock, ()> { + let (input, _) = space0(input)?; + let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; + let (input, arguments) = line(input)?; + let end_line = format!("#+END_{}", name); + let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?; + let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?; + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + RawBlock { + name, + contents, + arguments: arguments.trim(), + pre_blank, + contents_without_blank_lines, + post_blank, + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + RawBlock::parse( + r#"#+BEGIN_SRC +#+END_SRC"# + ), + Some(( + "", + RawBlock { + contents: "", + contents_without_blank_lines: "", + pre_blank: 0, + post_blank: 0, + name: "SRC".into(), + arguments: "" + } + )) + ); + + assert_eq!( + RawBlock::parse( + r#"#+begin_src + #+end_src"# + ), + Some(( + "", + RawBlock { + contents: "", + contents_without_blank_lines: "", + pre_blank: 0, + post_blank: 0, + name: "src".into(), + arguments: "" + } + )) + ); + + assert_eq!( + RawBlock::parse( + r#"#+BEGIN_SRC javascript +console.log('Hello World!'); +#+END_SRC + +"# + ), + Some(( + "", + RawBlock { + contents: "console.log('Hello World!');\n", + contents_without_blank_lines: "console.log('Hello World!');\n", + pre_blank: 0, + post_blank: 1, + name: "SRC".into(), + arguments: "javascript" + } + )) + ); + // TODO: more testing +} diff --git a/src/elements/clock.rs b/src/elements/clock.rs new file mode 100644 index 0000000..c489a88 --- /dev/null +++ b/src/elements/clock.rs @@ -0,0 +1,242 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::tag, + character::complete::{char, digit1, space0}, + combinator::recognize, + sequence::separated_pair, + IResult, +}; + +use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp}; +use crate::parse::combinators::{blank_lines_count, eol}; + +/// Clock Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(untagged))] +#[derive(Debug, Clone)] +pub enum Clock<'a> { + /// Closed Clock + Closed { + /// Time start + start: Datetime<'a>, + /// Time end + end: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + /// Clock duration + duration: Cow<'a, str>, + /// Numbers of blank lines between the clock line and next non-blank + /// line or buffer's end + post_blank: usize, + }, + /// Running Clock + Running { + /// Time start + start: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + /// Numbers of blank lines between the clock line and next non-blank + /// line or buffer's end + post_blank: usize, + }, +} + +impl Clock<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, Clock)> { + parse_internal(input).ok() + } + + pub fn into_onwed(self) -> Clock<'static> { + match self { + Clock::Closed { + start, + end, + repeater, + delay, + duration, + post_blank, + } => Clock::Closed { + start: start.into_owned(), + end: end.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + duration: duration.into_owned().into(), + post_blank, + }, + Clock::Running { + start, + repeater, + delay, + post_blank, + } => Clock::Running { + start: start.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + post_blank, + }, + } + } + + /// Returns `true` if the clock is running. + pub fn is_running(&self) -> bool { + match self { + Clock::Closed { .. } => false, + Clock::Running { .. } => true, + } + } + + /// Returns `true` if the clock is closed. + pub fn is_closed(&self) -> bool { + match self { + Clock::Closed { .. } => true, + Clock::Running { .. } => false, + } + } + + /// Returns clock duration, or `None` if it's running. + pub fn duration(&self) -> Option<&str> { + match self { + Clock::Closed { duration, .. } => Some(duration), + Clock::Running { .. } => None, + } + } + + /// Constructs a timestamp from the clock. + pub fn value(&self) -> Timestamp { + match &*self { + Clock::Closed { + start, + end, + repeater, + delay, + .. + } => Timestamp::InactiveRange { + start: start.clone(), + end: end.clone(), + repeater: repeater.clone(), + delay: delay.clone(), + }, + Clock::Running { + start, + repeater, + delay, + .. + } => Timestamp::Inactive { + start: start.clone(), + repeater: repeater.clone(), + delay: delay.clone(), + }, + } + } +} + +fn parse_internal(input: &str) -> IResult<&str, Clock, ()> { + let (input, _) = space0(input)?; + let (input, _) = tag("CLOCK:")(input)?; + let (input, _) = space0(input)?; + let (input, timestamp) = parse_inactive(input)?; + + match timestamp { + Timestamp::InactiveRange { + start, + end, + repeater, + delay, + } => { + let (input, _) = space0(input)?; + let (input, _) = tag("=>")(input)?; + let (input, _) = space0(input)?; + let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?; + let (input, _) = eol(input)?; + let (input, blank) = blank_lines_count(input)?; + Ok(( + input, + Clock::Closed { + start, + end, + repeater, + delay, + duration: duration.into(), + post_blank: blank, + }, + )) + } + Timestamp::Inactive { + start, + repeater, + delay, + } => { + let (input, _) = eol(input)?; + let (input, blank) = blank_lines_count(input)?; + Ok(( + input, + Clock::Running { + start, + repeater, + delay, + post_blank: blank, + }, + )) + } + _ => unreachable!( + "`parse_inactive` only returns `Timestamp::InactiveRange` or `Timestamp::Inactive`." + ), + } +} + +#[test] +fn parse() { + assert_eq!( + Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"), + Some(( + "", + Clock::Running { + start: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(9), + minute: Some(39) + }, + repeater: None, + delay: None, + post_blank: 0, + } + )) + ); + assert_eq!( + Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n"), + Some(( + "", + Clock::Closed { + start: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(9), + minute: Some(39) + }, + end: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(10), + minute: Some(39) + }, + repeater: None, + delay: None, + duration: "1:00".into(), + post_blank: 1, + } + )) + ); +} diff --git a/src/elements/comment.rs b/src/elements/comment.rs new file mode 100644 index 0000000..d6d414d --- /dev/null +++ b/src/elements/comment.rs @@ -0,0 +1,53 @@ +use std::borrow::Cow; + +use nom::{ + error::{make_error, ErrorKind}, + Err, IResult, +}; + +use crate::parse::combinators::{blank_lines_count, lines_while}; + +#[derive(Debug, Default, Clone)] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct Comment<'a> { + /// Comments value, with pound signs + pub value: Cow<'a, str>, + /// Numbers of blank lines between last comment's line and next non-blank + /// line or buffer's end + pub post_blank: usize, +} + +impl Comment<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Comment<'static> { + Comment { + value: self.value.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +fn parse_internal(input: &str) -> IResult<&str, Comment, ()> { + let (input, value) = lines_while(|line| { + let line = line.trim_start(); + line == "#" || line.starts_with("# ") + })(input)?; + + if value.is_empty() { + // TODO: better error kind + return Err(Err::Error(make_error(input, ErrorKind::Many0))); + } + + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + Comment { + value: value.into(), + post_blank, + }, + )) +} diff --git a/src/elements/cookie.rs b/src/elements/cookie.rs new file mode 100644 index 0000000..59dd012 --- /dev/null +++ b/src/elements/cookie.rs @@ -0,0 +1,122 @@ +use std::borrow::Cow; + +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::digit0, + combinator::recognize, + sequence::{delimited, pair, separated_pair}, + IResult, +}; + +/// Statistics Cookie Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Cookie<'a> { + /// Full cookie value + pub value: Cow<'a, str>, +} + +impl Cookie<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, Cookie)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Cookie<'static> { + Cookie { + value: self.value.into_owned().into(), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, Cookie, ()> { + let (input, value) = recognize(delimited( + tag("["), + alt(( + separated_pair(digit0, tag("/"), digit0), + pair(digit0, tag("%")), + )), + tag("]"), + ))(input)?; + + Ok(( + input, + Cookie { + value: value.into(), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + Cookie::parse("[1/10]"), + Some(( + "", + Cookie { + value: "[1/10]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[1/1000]"), + Some(( + "", + Cookie { + value: "[1/1000]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[10%]"), + Some(( + "", + Cookie { + value: "[10%]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[%]"), + Some(( + "", + Cookie { + value: "[%]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[/]"), + Some(( + "", + Cookie { + value: "[/]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[100/]"), + Some(( + "", + Cookie { + value: "[100/]".into() + } + )) + ); + assert_eq!( + Cookie::parse("[/100]"), + Some(( + "", + Cookie { + value: "[/100]".into() + } + )) + ); + + assert!(Cookie::parse("[10% ]").is_none()); + assert!(Cookie::parse("[1//100]").is_none()); + assert!(Cookie::parse("[1\\100]").is_none()); + assert!(Cookie::parse("[10%%]").is_none()); +} diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs new file mode 100644 index 0000000..20bb956 --- /dev/null +++ b/src/elements/drawer.rs @@ -0,0 +1,121 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_while1}, + character::complete::space0, + sequence::delimited, + IResult, +}; + +use crate::parse::combinators::{blank_lines_count, eol, lines_till}; + +/// Drawer Element +#[derive(Debug, Default, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct Drawer<'a> { + /// Drawer name + pub name: Cow<'a, str>, + /// Numbers of blank lines between first drawer's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last drawer's line and next non-blank + /// line or buffer's end + pub post_blank: usize, +} + +impl Drawer<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, (Drawer, &str))> { + parse_drawer(input).ok() + } + + pub fn into_owned(self) -> Drawer<'static> { + Drawer { + name: self.name.into_owned().into(), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +#[inline] +pub fn parse_drawer(input: &str) -> IResult<&str, (Drawer, &str), ()> { + let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?; + + let (content, blank) = blank_lines_count(content)?; + drawer.pre_blank = blank; + + let (input, blank) = blank_lines_count(input)?; + drawer.post_blank = blank; + + Ok((input, (drawer, content))) +} + +pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str), ()> { + let (input, _) = space0(input)?; + let (input, name) = delimited( + tag(":"), + take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), + tag(":"), + )(input)?; + let (input, _) = eol(input)?; + let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?; + + Ok(( + input, + ( + Drawer { + name: name.into(), + pre_blank: 0, + post_blank: 0, + }, + contents, + ), + )) +} + +#[test] +fn parse() { + assert_eq!( + parse_drawer( + r#":PROPERTIES: + :CUSTOM_ID: id + :END:"# + ), + Ok(( + "", + ( + Drawer { + name: "PROPERTIES".into(), + pre_blank: 0, + post_blank: 0 + }, + " :CUSTOM_ID: id\n" + ) + )) + ); + assert_eq!( + parse_drawer( + r#":PROPERTIES: + + + :END: + +"# + ), + Ok(( + "", + ( + Drawer { + name: "PROPERTIES".into(), + pre_blank: 2, + post_blank: 1, + }, + "" + ) + )) + ); + + // https://github.com/PoiScript/orgize/issues/9 + assert!(parse_drawer(":SPAGHETTI:\n").is_err()); +} diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs new file mode 100644 index 0000000..c74e7c1 --- /dev/null +++ b/src/elements/dyn_block.rs @@ -0,0 +1,99 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::tag_no_case, + character::complete::{alpha1, space0, space1}, + IResult, +}; + +use crate::parse::combinators::{blank_lines_count, line, lines_till}; + +/// Dynamic Block Element +#[derive(Debug, Default, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct DynBlock<'a> { + /// Block name + pub block_name: Cow<'a, str>, + /// Block argument + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub arguments: Option>, + /// Numbers of blank lines between first block's line and next non-blank + /// line + pub pre_blank: usize, + /// Numbers of blank lines between last drawer's line and next non-blank + /// line or buffer's end + pub post_blank: usize, +} + +impl DynBlock<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, (DynBlock, &str))> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> DynBlock<'static> { + DynBlock { + block_name: self.block_name.into_owned().into(), + arguments: self.arguments.map(Into::into).map(Cow::Owned), + pre_blank: self.pre_blank, + post_blank: self.post_blank, + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, (DynBlock, &str), ()> { + let (input, _) = space0(input)?; + let (input, _) = tag_no_case("#+BEGIN:")(input)?; + let (input, _) = space1(input)?; + let (input, name) = alpha1(input)?; + let (input, args) = line(input)?; + let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?; + let (contents, pre_blank) = blank_lines_count(contents)?; + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + ( + DynBlock { + block_name: name.into(), + arguments: if args.trim().is_empty() { + None + } else { + Some(args.trim().into()) + }, + pre_blank, + post_blank, + }, + contents, + ), + )) +} + +#[test] +fn parse() { + // TODO: testing + assert_eq!( + DynBlock::parse( + r#"#+BEGIN: clocktable :scope file + + +CONTENTS +#+END: + +"# + ), + Some(( + "", + ( + DynBlock { + block_name: "clocktable".into(), + arguments: Some(":scope file".into()), + pre_blank: 2, + post_blank: 1, + }, + "CONTENTS\n" + ) + )) + ); +} diff --git a/src/elements/emphasis.rs b/src/elements/emphasis.rs new file mode 100644 index 0000000..6517c8b --- /dev/null +++ b/src/elements/emphasis.rs @@ -0,0 +1,113 @@ +use bytecount::count; +use memchr::memchr_iter; + +use crate::elements::Element; + +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct Emphasis<'a> { + marker: u8, + contents: &'a str, +} + +impl<'a> Emphasis<'a> { + pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> { + if text.len() < 3 { + return None; + } + + let bytes = text.as_bytes(); + + if bytes[1].is_ascii_whitespace() { + return None; + } + + for i in memchr_iter(marker, bytes).skip(1) { + // contains at least one character + if i == 1 { + continue; + } else if count(&bytes[1..i], b'\n') >= 2 { + break; + } else if validate_marker(i, text) { + return Some(( + &text[i + 1..], + Emphasis { + marker, + contents: &text[1..i], + }, + )); + } + } + None + } + + pub fn into_element(self) -> (Element<'a>, &'a str) { + let Emphasis { marker, contents } = self; + let element = match marker { + b'*' => Element::Bold, + b'+' => Element::Strike, + b'/' => Element::Italic, + b'_' => Element::Underline, + b'=' => Element::Verbatim { + value: contents.into(), + }, + b'~' => Element::Code { + value: contents.into(), + }, + _ => unreachable!(), + }; + (element, contents) + } +} + +fn validate_marker(pos: usize, text: &str) -> bool { + if text.as_bytes()[pos - 1].is_ascii_whitespace() { + false + } else if let Some(&post) = text.as_bytes().get(pos + 1) { + match post { + b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true, + _ => false, + } + } else { + true + } +} + +#[test] +fn parse() { + assert_eq!( + Emphasis::parse("*bold*", b'*'), + Some(( + "", + Emphasis { + contents: "bold", + marker: b'*' + } + )) + ); + assert_eq!( + Emphasis::parse("*bo*ld*", b'*'), + Some(( + "", + Emphasis { + contents: "bo*ld", + marker: b'*' + } + )) + ); + assert_eq!( + Emphasis::parse("*bo\nld*", b'*'), + Some(( + "", + Emphasis { + contents: "bo\nld", + marker: b'*' + } + )) + ); + assert_eq!(Emphasis::parse("*bold*a", b'*'), None); + assert_eq!(Emphasis::parse("*bold*", b'/'), None); + assert_eq!(Emphasis::parse("*bold *", b'*'), None); + assert_eq!(Emphasis::parse("* bold*", b'*'), None); + assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None); +} diff --git a/src/elements/fixed_width.rs b/src/elements/fixed_width.rs new file mode 100644 index 0000000..ae06677 --- /dev/null +++ b/src/elements/fixed_width.rs @@ -0,0 +1,80 @@ +use std::borrow::Cow; + +use nom::{ + error::{make_error, ErrorKind}, + Err, IResult, +}; + +use crate::parse::combinators::{blank_lines_count, lines_while}; + +#[derive(Debug, Default, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct FixedWidth<'a> { + /// Fixed width value + pub value: Cow<'a, str>, + /// Numbers of blank lines between last fixed width's line and next + /// non-blank line or buffer's end + pub post_blank: usize, +} + +impl FixedWidth<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> FixedWidth<'static> { + FixedWidth { + value: self.value.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +fn parse_internal(input: &str) -> IResult<&str, FixedWidth, ()> { + let (input, value) = lines_while(|line| { + let line = line.trim_start(); + line == ":" || line.starts_with(": ") + })(input)?; + + if value.is_empty() { + // TODO: better error kind + return Err(Err::Error(make_error(input, ErrorKind::Many0))); + } + + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + FixedWidth { + value: value.into(), + post_blank, + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + FixedWidth::parse( + r#": A +: +: B +: C + +"# + ), + Some(( + "", + FixedWidth { + value: r#": A +: +: B +: C +"# + .into(), + post_blank: 1 + } + )) + ); +} diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs new file mode 100644 index 0000000..2c91f16 --- /dev/null +++ b/src/elements/fn_def.rs @@ -0,0 +1,117 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_while1}, + sequence::delimited, + IResult, +}; + +use crate::parse::combinators::{blank_lines_count, line}; + +/// Footnote Definition Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Default, Clone)] +pub struct FnDef<'a> { + /// Footnote label, used for reference + pub label: Cow<'a, str>, + /// Numbers of blank lines between last footnote definition's line and next + /// non-blank line or buffer's end + pub post_blank: usize, +} + +impl FnDef<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> FnDef<'static> { + FnDef { + label: self.label.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +fn parse_internal(input: &str) -> IResult<&str, (FnDef, &str), ()> { + let (input, label) = delimited( + tag("[fn:"), + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + tag("]"), + )(input)?; + + let (input, content) = line(input)?; + + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + ( + FnDef { + label: label.into(), + post_blank, + }, + content, + ), + )) +} + +#[test] +fn parse() { + assert_eq!( + FnDef::parse("[fn:1] https://orgmode.org"), + Some(( + "", + ( + FnDef { + label: "1".into(), + post_blank: 0 + }, + " https://orgmode.org" + ) + )) + ); + assert_eq!( + FnDef::parse("[fn:word_1] https://orgmode.org"), + Some(( + "", + ( + FnDef { + label: "word_1".into(), + post_blank: 0, + }, + " https://orgmode.org" + ) + )) + ); + assert_eq!( + FnDef::parse("[fn:WORD-1] https://orgmode.org"), + Some(( + "", + ( + FnDef { + label: "WORD-1".into(), + post_blank: 0, + }, + " https://orgmode.org" + ) + )) + ); + assert_eq!( + FnDef::parse("[fn:WORD]"), + Some(( + "", + ( + FnDef { + label: "WORD".into(), + post_blank: 0, + }, + "" + ) + )) + ); + + assert!(FnDef::parse("[fn:] https://orgmode.org").is_none()); + assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none()); + assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none()); +} diff --git a/src/elements/fn_ref.rs b/src/elements/fn_ref.rs new file mode 100644 index 0000000..c03253e --- /dev/null +++ b/src/elements/fn_ref.rs @@ -0,0 +1,111 @@ +use std::borrow::Cow; + +use memchr::memchr2_iter; +use nom::{ + bytes::complete::{tag, take_while}, + combinator::opt, + error::{make_error, ErrorKind}, + sequence::preceded, + Err, IResult, +}; + +/// Footnote Reference Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct FnRef<'a> { + /// Footnote label + pub label: Cow<'a, str>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub definition: Option>, +} + +impl FnRef<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, FnRef)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> FnRef<'static> { + FnRef { + label: self.label.into_owned().into(), + definition: self.definition.map(Into::into).map(Cow::Owned), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, FnRef, ()> { + let (input, _) = tag("[fn:")(input)?; + let (input, label) = + take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; + let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?; + let (input, _) = tag("]")(input)?; + + Ok(( + input, + FnRef { + label: label.into(), + definition: definition.map(Into::into), + }, + )) +} + +fn balanced_brackets(input: &str) -> IResult<&str, &str, ()> { + let mut pairs = 1; + for i in memchr2_iter(b'[', b']', input.as_bytes()) { + if input.as_bytes()[i] == b'[' { + pairs += 1; + } else if pairs != 1 { + pairs -= 1; + } else { + return Ok((&input[i..], &input[0..i])); + } + } + Err(Err::Error(make_error(input, ErrorKind::Tag))) +} + +#[test] +fn parse() { + assert_eq!( + FnRef::parse("[fn:1]"), + Some(( + "", + FnRef { + label: "1".into(), + definition: None + }, + )) + ); + assert_eq!( + FnRef::parse("[fn:1:2]"), + Some(( + "", + FnRef { + label: "1".into(), + definition: Some("2".into()) + }, + )) + ); + assert_eq!( + FnRef::parse("[fn::2]"), + Some(( + "", + FnRef { + label: "".into(), + definition: Some("2".into()) + }, + )) + ); + assert_eq!( + FnRef::parse("[fn::[]]"), + Some(( + "", + FnRef { + label: "".into(), + definition: Some("[]".into()) + }, + )) + ); + + assert!(FnRef::parse("[fn::[]").is_none()); +} diff --git a/src/elements/inline_call.rs b/src/elements/inline_call.rs new file mode 100644 index 0000000..8878beb --- /dev/null +++ b/src/elements/inline_call.rs @@ -0,0 +1,122 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_till}, + combinator::opt, + sequence::{delimited, preceded}, + IResult, +}; + +/// Inline Babel Call Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Default, Clone)] +pub struct InlineCall<'a> { + /// Called code block name + pub name: Cow<'a, str>, + /// Header arguments applied to the code block + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub inside_header: Option>, + /// Argument passed to the code block + pub arguments: Cow<'a, str>, + /// Header arguments applied to the calling instance + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub end_header: Option>, +} + +impl InlineCall<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, InlineCall)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> InlineCall<'static> { + InlineCall { + name: self.name.into_owned().into(), + arguments: self.arguments.into_owned().into(), + inside_header: self.inside_header.map(Into::into).map(Cow::Owned), + end_header: self.end_header.map(Into::into).map(Cow::Owned), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, InlineCall, ()> { + let (input, name) = preceded( + tag("call_"), + take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'), + )(input)?; + let (input, inside_header) = opt(delimited( + tag("["), + take_till(|c| c == ']' || c == '\n'), + tag("]"), + ))(input)?; + let (input, arguments) = + delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?; + let (input, end_header) = opt(delimited( + tag("["), + take_till(|c| c == ']' || c == '\n'), + tag("]"), + ))(input)?; + + Ok(( + input, + InlineCall { + name: name.into(), + arguments: arguments.into(), + inside_header: inside_header.map(Into::into), + end_header: end_header.map(Into::into), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + InlineCall::parse("call_square(4)"), + Some(( + "", + InlineCall { + name: "square".into(), + arguments: "4".into(), + inside_header: None, + end_header: None, + } + )) + ); + assert_eq!( + InlineCall::parse("call_square[:results output](4)"), + Some(( + "", + InlineCall { + name: "square".into(), + arguments: "4".into(), + inside_header: Some(":results output".into()), + end_header: None, + }, + )) + ); + assert_eq!( + InlineCall::parse("call_square(4)[:results html]"), + Some(( + "", + InlineCall { + name: "square".into(), + arguments: "4".into(), + inside_header: None, + end_header: Some(":results html".into()), + }, + )) + ); + assert_eq!( + InlineCall::parse("call_square[:results output](4)[:results html]"), + Some(( + "", + InlineCall { + name: "square".into(), + arguments: "4".into(), + inside_header: Some(":results output".into()), + end_header: Some(":results html".into()), + }, + )) + ); +} diff --git a/src/elements/inline_src.rs b/src/elements/inline_src.rs new file mode 100644 index 0000000..f04d31a --- /dev/null +++ b/src/elements/inline_src.rs @@ -0,0 +1,88 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_till, take_while1}, + combinator::opt, + sequence::delimited, + IResult, +}; + +/// Inline Src Block Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct InlineSrc<'a> { + /// Language of the code + pub lang: Cow<'a, str>, + /// Optional header arguments + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub options: Option>, + /// Source code + pub body: Cow<'a, str>, +} + +impl InlineSrc<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, InlineSrc)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> InlineSrc<'static> { + InlineSrc { + lang: self.lang.into_owned().into(), + options: self.options.map(Into::into).map(Cow::Owned), + body: self.body.into_owned().into(), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, InlineSrc, ()> { + let (input, _) = tag("src_")(input)?; + let (input, lang) = + take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{')(input)?; + let (input, options) = opt(delimited( + tag("["), + take_till(|c| c == '\n' || c == ']'), + tag("]"), + ))(input)?; + let (input, body) = delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?; + + Ok(( + input, + InlineSrc { + lang: lang.into(), + options: options.map(Into::into), + body: body.into(), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + InlineSrc::parse("src_C{int a = 0;}"), + Some(( + "", + InlineSrc { + lang: "C".into(), + options: None, + body: "int a = 0;".into() + }, + )) + ); + assert_eq!( + InlineSrc::parse("src_xml[:exports code]{text}"), + Some(( + "", + InlineSrc { + lang: "xml".into(), + options: Some(":exports code".into()), + body: "text".into(), + }, + )) + ); + + assert!(InlineSrc::parse("src_xml[:exports code]{text").is_none()); + assert!(InlineSrc::parse("src_[:exports code]{text}").is_none()); + assert!(InlineSrc::parse("src_xml[:exports code]").is_none()); +} diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs new file mode 100644 index 0000000..af8f8d9 --- /dev/null +++ b/src/elements/keyword.rs @@ -0,0 +1,230 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_till}, + character::complete::space0, + combinator::opt, + sequence::delimited, + IResult, +}; + +use crate::elements::Element; +use crate::parse::combinators::{blank_lines_count, line}; + +/// Keyword Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Keyword<'a> { + /// Keyword name + pub key: Cow<'a, str>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub optional: Option>, + /// Keyword value + pub value: Cow<'a, str>, + /// Numbers of blank lines between keyword line and next non-blank line or + /// buffer's end + pub post_blank: usize, +} + +impl Keyword<'_> { + pub fn into_owned(self) -> Keyword<'static> { + Keyword { + key: self.key.into_owned().into(), + optional: self.optional.map(Into::into).map(Cow::Owned), + value: self.value.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +/// Babel Call Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct BabelCall<'a> { + /// Babel call value + pub value: Cow<'a, str>, + /// Numbers of blank lines between babel call line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl BabelCall<'_> { + pub fn into_owned(self) -> BabelCall<'static> { + BabelCall { + value: self.value.into_owned().into(), + post_blank: self.post_blank, + } + } +} + +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct RawKeyword<'a> { + pub key: &'a str, + pub value: &'a str, + pub optional: Option<&'a str>, + pub post_blank: usize, +} + +impl<'a> RawKeyword<'a> { + pub fn parse(input: &str) -> Option<(&str, RawKeyword)> { + parse_internal(input).ok() + } + + pub fn into_element(self) -> Element<'a> { + let RawKeyword { + key, + value, + optional, + post_blank, + } = self; + + if (&*key).eq_ignore_ascii_case("CALL") { + BabelCall { + value: value.into(), + post_blank, + } + .into() + } else { + Keyword { + key: key.into(), + optional: optional.map(Into::into), + value: value.into(), + post_blank, + } + .into() + } + } +} + +fn parse_internal(input: &str) -> IResult<&str, RawKeyword, ()> { + let (input, _) = space0(input)?; + let (input, _) = tag("#+")(input)?; + let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; + let (input, optional) = opt(delimited( + tag("["), + take_till(|c| c == ']' || c == '\n'), + tag("]"), + ))(input)?; + let (input, _) = tag(":")(input)?; + let (input, value) = line(input)?; + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + RawKeyword { + key, + optional, + value: value.trim(), + post_blank, + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + RawKeyword::parse("#+KEY:"), + Some(( + "", + RawKeyword { + key: "KEY", + optional: None, + value: "", + post_blank: 0 + } + )) + ); + assert_eq!( + RawKeyword::parse("#+KEY: VALUE"), + Some(( + "", + RawKeyword { + key: "KEY", + optional: None, + value: "VALUE", + post_blank: 0 + } + )) + ); + assert_eq!( + RawKeyword::parse("#+K_E_Y: VALUE"), + Some(( + "", + RawKeyword { + key: "K_E_Y", + optional: None, + value: "VALUE", + post_blank: 0 + } + )) + ); + assert_eq!( + RawKeyword::parse("#+KEY:VALUE\n"), + Some(( + "", + RawKeyword { + key: "KEY", + optional: None, + value: "VALUE", + post_blank: 0 + } + )) + ); + assert!(RawKeyword::parse("#+KE Y: VALUE").is_none()); + assert!(RawKeyword::parse("#+ KEY: VALUE").is_none()); + + assert_eq!( + RawKeyword::parse("#+RESULTS:"), + Some(( + "", + RawKeyword { + key: "RESULTS", + optional: None, + value: "", + post_blank: 0 + } + )) + ); + + assert_eq!( + RawKeyword::parse("#+ATTR_LATEX: :width 5cm\n"), + Some(( + "", + RawKeyword { + key: "ATTR_LATEX", + optional: None, + value: ":width 5cm", + post_blank: 0 + } + )) + ); + + assert_eq!( + RawKeyword::parse("#+CALL: double(n=4)"), + Some(( + "", + RawKeyword { + key: "CALL", + optional: None, + value: "double(n=4)", + post_blank: 0 + } + )) + ); + + assert_eq!( + RawKeyword::parse("#+CAPTION[Short caption]: Longer caption."), + Some(( + "", + RawKeyword { + key: "CAPTION", + optional: Some("Short caption"), + value: "Longer caption.", + post_blank: 0 + } + )) + ); +} diff --git a/src/elements/link.rs b/src/elements/link.rs new file mode 100644 index 0000000..b0bb08d --- /dev/null +++ b/src/elements/link.rs @@ -0,0 +1,80 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_while}, + combinator::opt, + sequence::delimited, + IResult, +}; + +/// Link Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Link<'a> { + /// Link destination + pub path: Cow<'a, str>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub desc: Option>, +} + +impl Link<'_> { + #[inline] + pub(crate) fn parse(input: &str) -> Option<(&str, Link)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Link<'static> { + Link { + path: self.path.into_owned().into(), + desc: self.desc.map(Into::into).map(Cow::Owned), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, Link, ()> { + let (input, path) = delimited( + tag("[["), + take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), + tag("]"), + )(input)?; + let (input, desc) = opt(delimited( + tag("["), + take_while(|c: char| c != '[' && c != ']'), + tag("]"), + ))(input)?; + let (input, _) = tag("]")(input)?; + Ok(( + input, + Link { + path: path.into(), + desc: desc.map(Into::into), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + Link::parse("[[#id]]"), + Some(( + "", + Link { + path: "#id".into(), + desc: None + } + )) + ); + assert_eq!( + Link::parse("[[#id][desc]]"), + Some(( + "", + Link { + path: "#id".into(), + desc: Some("desc".into()) + } + )) + ); + assert!(Link::parse("[[#id][desc]").is_none()); +} diff --git a/src/elements/list.rs b/src/elements/list.rs new file mode 100644 index 0000000..3b49852 --- /dev/null +++ b/src/elements/list.rs @@ -0,0 +1,316 @@ +use std::borrow::Cow; +use std::iter::once; + +use memchr::{memchr, memchr_iter}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{digit1, space0}, + combinator::{map, recognize}, + sequence::terminated, + IResult, +}; + +/// Plain List Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct List { + /// List indent, number of whitespaces + pub indent: usize, + /// List's type, determined by the first item of this list + pub ordered: bool, + /// Numbers of blank lines between last list's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +/// List Item Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct ListItem<'a> { + /// List item bullet + pub bullet: Cow<'a, str>, + /// List item indent, number of whitespaces + pub indent: usize, + /// List item type + pub ordered: bool, + // TODO checkbox + // TODO counter + // TODO tag +} + +impl ListItem<'_> { + #[inline] + pub(crate) fn parse(input: &str) -> Option<(&str, (ListItem, &str))> { + list_item(input).ok() + } + + pub fn into_owned(self) -> ListItem<'static> { + ListItem { + bullet: self.bullet.into_owned().into(), + indent: self.indent, + ordered: self.ordered, + } + } +} + +fn list_item(input: &str) -> IResult<&str, (ListItem, &str), ()> { + let (input, indent) = map(space0, |s: &str| s.len())(input)?; + let (input, bullet) = recognize(alt(( + tag("+ "), + tag("* "), + tag("- "), + terminated(digit1, tag(". ")), + )))(input)?; + let (input, contents) = list_item_contents(input, indent); + Ok(( + input, + ( + ListItem { + bullet: bullet.into(), + indent, + ordered: bullet.starts_with(|c: char| c.is_ascii_digit()), + }, + contents, + ), + )) +} + +fn list_item_contents(input: &str, indent: usize) -> (&str, &str) { + let mut last_end = memchr(b'\n', input.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| input.len()); + + for i in memchr_iter(b'\n', input.as_bytes()) + .map(|i| i + 1) + .chain(once(input.len())) + .skip(1) + { + if input[last_end..i] + .as_bytes() + .iter() + .all(u8::is_ascii_whitespace) + { + let x = memchr(b'\n', &input[i..].as_bytes()) + .map(|ii| i + ii + 1) + .unwrap_or_else(|| input.len()); + + // two consecutive empty lines + if input[i..x].as_bytes().iter().all(u8::is_ascii_whitespace) { + return (&input[x..], &input[0..x]); + } + } + + // line less or equally indented than the starting line + if input[last_end..i] + .as_bytes() + .iter() + .take(indent + 1) + .any(|c| !c.is_ascii_whitespace()) + { + return (&input[last_end..], &input[0..last_end]); + } + + last_end = i; + } + + ("", input) +} + +#[test] +fn parse() { + assert_eq!( + list_item( + r#"+ item1 ++ item2"# + ), + Ok(( + "+ item2", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 +"# + ) + )) + ); + assert_eq!( + list_item( + r#"* item1 + +* item2"# + ), + Ok(( + "* item2", + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + +"# + ) + )) + ); + assert_eq!( + list_item( + r#"* item1 + + +* item2"# + ), + Ok(( + "* item2", + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + +"# + ) + )) + ); + assert_eq!( + list_item( + r#"* item1 + +"# + ), + Ok(( + "", + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + +"# + ) + )) + ); + assert_eq!( + list_item( + r#"+ item1 + + item2 +"# + ), + Ok(( + "", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + item2 +"# + ) + )) + ); + assert_eq!( + list_item( + r#"+ item1 + + + item2 + ++ item 3"# + ), + Ok(( + "+ item 3", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + + item2 + +"# + ) + )) + ); + assert_eq!( + list_item( + r#" + item1 + + + item2"# + ), + Ok(( + " + item2", + ( + ListItem { + bullet: "+ ".into(), + indent: 2, + ordered: false, + }, + r#"item1 + +"# + ) + )) + ); + assert_eq!( + list_item( + r#" 1. item1 +2. item2 + 3. item3"# + ), + Ok(( + r#"2. item2 + 3. item3"#, + ( + ListItem { + bullet: "1. ".into(), + indent: 2, + ordered: true, + }, + r#"item1 +"# + ) + )) + ); + assert_eq!( + list_item( + r#"+ 1 + + - 2 + + - 3 + ++ 4"# + ), + Ok(( + "+ 4", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"1 + + - 2 + + - 3 + +"# + ) + )) + ); +} diff --git a/src/elements/macros.rs b/src/elements/macros.rs new file mode 100644 index 0000000..8568d5b --- /dev/null +++ b/src/elements/macros.rs @@ -0,0 +1,91 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take, take_until, take_while1}, + combinator::{opt, verify}, + sequence::delimited, + IResult, +}; + +/// Macro Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Macros<'a> { + /// Macro name + pub name: Cow<'a, str>, + /// Arguments passed to the macro + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub arguments: Option>, +} + +impl Macros<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, Macros)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Macros<'static> { + Macros { + name: self.name.into_owned().into(), + arguments: self.arguments.map(Into::into).map(Cow::Owned), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, Macros, ()> { + let (input, _) = tag("{{{")(input)?; + let (input, name) = verify( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + |s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()), + )(input)?; + let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?; + let (input, _) = tag("}}}")(input)?; + + Ok(( + input, + Macros { + name: name.into(), + arguments: arguments.map(Into::into), + }, + )) +} + +#[test] +fn test() { + assert_eq!( + Macros::parse("{{{poem(red,blue)}}}"), + Some(( + "", + Macros { + name: "poem".into(), + arguments: Some("red,blue".into()) + } + )) + ); + assert_eq!( + Macros::parse("{{{poem())}}}"), + Some(( + "", + Macros { + name: "poem".into(), + arguments: Some(")".into()) + } + )) + ); + assert_eq!( + Macros::parse("{{{author}}}"), + Some(( + "", + Macros { + name: "author".into(), + arguments: None + } + )) + ); + + assert!(Macros::parse("{{{0uthor}}}").is_none()); + assert!(Macros::parse("{{{author}}").is_none()); + assert!(Macros::parse("{{{poem(}}}").is_none()); + assert!(Macros::parse("{{{poem)}}}").is_none()); +} diff --git a/src/elements/mod.rs b/src/elements/mod.rs new file mode 100644 index 0000000..ff99f76 --- /dev/null +++ b/src/elements/mod.rs @@ -0,0 +1,245 @@ +//! Org-mode elements + +pub(crate) mod block; +pub(crate) mod clock; +pub(crate) mod comment; +pub(crate) mod cookie; +pub(crate) mod drawer; +pub(crate) mod dyn_block; +pub(crate) mod emphasis; +pub(crate) mod fixed_width; +pub(crate) mod fn_def; +pub(crate) mod fn_ref; +pub(crate) mod inline_call; +pub(crate) mod inline_src; +pub(crate) mod keyword; +pub(crate) mod link; +pub(crate) mod list; +pub(crate) mod macros; +pub(crate) mod planning; +pub(crate) mod radio_target; +pub(crate) mod rule; +pub(crate) mod snippet; +pub(crate) mod table; +pub(crate) mod target; +pub(crate) mod timestamp; +pub(crate) mod title; + +pub use self::{ + block::{ + CenterBlock, CommentBlock, ExampleBlock, ExportBlock, QuoteBlock, SourceBlock, + SpecialBlock, VerseBlock, + }, + clock::Clock, + comment::Comment, + cookie::Cookie, + drawer::Drawer, + dyn_block::DynBlock, + fixed_width::FixedWidth, + fn_def::FnDef, + fn_ref::FnRef, + inline_call::InlineCall, + inline_src::InlineSrc, + keyword::{BabelCall, Keyword}, + link::Link, + list::{List, ListItem}, + macros::Macros, + planning::Planning, + rule::Rule, + snippet::Snippet, + table::{Table, TableCell, TableRow}, + target::Target, + timestamp::{Datetime, Timestamp}, + title::Title, +}; + +use std::borrow::Cow; + +/// Element Enum +#[derive(Debug)] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(tag = "type", rename_all = "kebab-case"))] +pub enum Element<'a> { + SpecialBlock(SpecialBlock<'a>), + QuoteBlock(QuoteBlock<'a>), + CenterBlock(CenterBlock<'a>), + VerseBlock(VerseBlock<'a>), + CommentBlock(CommentBlock<'a>), + ExampleBlock(ExampleBlock<'a>), + ExportBlock(ExportBlock<'a>), + SourceBlock(SourceBlock<'a>), + BabelCall(BabelCall<'a>), + Section, + Clock(Clock<'a>), + Cookie(Cookie<'a>), + RadioTarget, + Drawer(Drawer<'a>), + Document { pre_blank: usize }, + DynBlock(DynBlock<'a>), + FnDef(FnDef<'a>), + FnRef(FnRef<'a>), + Headline { level: usize }, + InlineCall(InlineCall<'a>), + InlineSrc(InlineSrc<'a>), + Keyword(Keyword<'a>), + Link(Link<'a>), + List(List), + ListItem(ListItem<'a>), + Macros(Macros<'a>), + Snippet(Snippet<'a>), + Text { value: Cow<'a, str> }, + Paragraph { post_blank: usize }, + Rule(Rule), + Timestamp(Timestamp<'a>), + Target(Target<'a>), + Bold, + Strike, + Italic, + Underline, + Verbatim { value: Cow<'a, str> }, + Code { value: Cow<'a, str> }, + Comment(Comment<'a>), + FixedWidth(FixedWidth<'a>), + Title(Title<'a>), + Table(Table<'a>), + TableRow(TableRow), + TableCell(TableCell), +} + +impl Element<'_> { + pub fn is_container(&self) -> bool { + match self { + Element::SpecialBlock(_) + | Element::QuoteBlock(_) + | Element::CenterBlock(_) + | Element::VerseBlock(_) + | Element::Bold + | Element::Document { .. } + | Element::DynBlock(_) + | Element::Headline { .. } + | Element::Italic + | Element::List(_) + | Element::ListItem(_) + | Element::Paragraph { .. } + | Element::Section + | Element::Strike + | Element::Underline + | Element::Title(_) + | Element::Table(_) + | Element::TableRow(TableRow::Header) + | Element::TableRow(TableRow::Body) + | Element::TableCell(_) => true, + _ => false, + } + } + + pub fn into_owned(self) -> Element<'static> { + use Element::*; + + match self { + SpecialBlock(e) => SpecialBlock(e.into_owned()), + QuoteBlock(e) => QuoteBlock(e.into_owned()), + CenterBlock(e) => CenterBlock(e.into_owned()), + VerseBlock(e) => VerseBlock(e.into_owned()), + CommentBlock(e) => CommentBlock(e.into_owned()), + ExampleBlock(e) => ExampleBlock(e.into_owned()), + ExportBlock(e) => ExportBlock(e.into_owned()), + SourceBlock(e) => SourceBlock(e.into_owned()), + BabelCall(e) => BabelCall(e.into_owned()), + Section => Section, + Clock(e) => Clock(e.into_onwed()), + Cookie(e) => Cookie(e.into_owned()), + RadioTarget => RadioTarget, + Drawer(e) => Drawer(e.into_owned()), + Document { pre_blank } => Document { pre_blank }, + DynBlock(e) => DynBlock(e.into_owned()), + FnDef(e) => FnDef(e.into_owned()), + FnRef(e) => FnRef(e.into_owned()), + Headline { level } => Headline { level }, + InlineCall(e) => InlineCall(e.into_owned()), + InlineSrc(e) => InlineSrc(e.into_owned()), + Keyword(e) => Keyword(e.into_owned()), + Link(e) => Link(e.into_owned()), + List(e) => List(e), + ListItem(e) => ListItem(e.into_owned()), + Macros(e) => Macros(e.into_owned()), + Snippet(e) => Snippet(e.into_owned()), + Text { value } => Text { + value: value.into_owned().into(), + }, + Paragraph { post_blank } => Paragraph { post_blank }, + Rule(e) => Rule(e), + Timestamp(e) => Timestamp(e.into_owned()), + Target(e) => Target(e.into_owned()), + Bold => Bold, + Strike => Strike, + Italic => Italic, + Underline => Underline, + Verbatim { value } => Verbatim { + value: value.into_owned().into(), + }, + Code { value } => Code { + value: value.into_owned().into(), + }, + Comment(e) => Comment(e.into_owned()), + FixedWidth(e) => FixedWidth(e.into_owned()), + Title(e) => Title(e.into_owned()), + Table(e) => Table(e.into_owned()), + TableRow(e) => TableRow(e), + TableCell(e) => TableCell(e), + } + } +} + +macro_rules! impl_from { + ($($ele0:ident),*; $($ele1:ident),*) => { + $( + impl<'a> From<$ele0<'a>> for Element<'a> { + fn from(ele: $ele0<'a>) -> Element<'a> { + Element::$ele0(ele) + } + } + )* + $( + impl<'a> From<$ele1> for Element<'a> { + fn from(ele: $ele1) -> Element<'a> { + Element::$ele1(ele) + } + } + )* + }; +} + +impl_from!( + BabelCall, + CenterBlock, + Clock, + Comment, + CommentBlock, + Cookie, + Drawer, + DynBlock, + ExampleBlock, + ExportBlock, + FixedWidth, + FnDef, + FnRef, + InlineCall, + InlineSrc, + Keyword, + Link, + ListItem, + Macros, + QuoteBlock, + Snippet, + SourceBlock, + SpecialBlock, + Table, + Target, + Timestamp, + Title, + VerseBlock; + List, + Rule, + TableRow +); diff --git a/src/elements/planning.rs b/src/elements/planning.rs new file mode 100644 index 0000000..1659924 --- /dev/null +++ b/src/elements/planning.rs @@ -0,0 +1,98 @@ +use memchr::memchr; + +use crate::elements::Timestamp; + +/// Planning element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Planning<'a> { + /// Timestamp associated to deadline keyword + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub deadline: Option>, + /// Timestamp associated to scheduled keyword + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub scheduled: Option>, + /// Timestamp associated to closed keyword + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub closed: Option>, +} + +impl Planning<'_> { + #[inline] + pub(crate) fn parse(text: &str) -> Option<(&str, Planning)> { + let (mut deadline, mut scheduled, mut closed) = (None, None, None); + let (mut tail, off) = memchr(b'\n', text.as_bytes()) + .map(|i| (text[..i].trim(), i + 1)) + .unwrap_or_else(|| (text.trim(), text.len())); + + while let Some(i) = memchr(b' ', tail.as_bytes()) { + let next = &tail[i + 1..].trim_start(); + + macro_rules! set_timestamp { + ($timestamp:expr) => {{ + let (new_tail, timestamp) = + Timestamp::parse_active(next).or(Timestamp::parse_inactive(next))?; + $timestamp = Some(timestamp); + tail = new_tail.trim_start(); + }}; + } + + match &tail[..i] { + "DEADLINE:" if deadline.is_none() => set_timestamp!(deadline), + "SCHEDULED:" if scheduled.is_none() => set_timestamp!(scheduled), + "CLOSED:" if closed.is_none() => set_timestamp!(closed), + _ => return None, + } + } + + if deadline.is_none() && scheduled.is_none() && closed.is_none() { + None + } else { + Some(( + &text[off..], + Planning { + deadline, + scheduled, + closed, + }, + )) + } + } + + pub fn into_owned(self) -> Planning<'static> { + Planning { + deadline: self.deadline.map(|x| x.into_owned()), + scheduled: self.scheduled.map(|x| x.into_owned()), + closed: self.closed.map(|x| x.into_owned()), + } + } +} + +#[test] +fn prase() { + use crate::elements::Datetime; + + assert_eq!( + Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"), + Some(( + "", + Planning { + scheduled: Some(Timestamp::Active { + start: Datetime { + year: 2019, + month: 4, + day: 8, + dayname: "Mon".into(), + hour: None, + minute: None + }, + repeater: None, + delay: None + }), + deadline: None, + closed: None, + } + )) + ) +} diff --git a/src/elements/radio_target.rs b/src/elements/radio_target.rs new file mode 100644 index 0000000..fd529c7 --- /dev/null +++ b/src/elements/radio_target.rs @@ -0,0 +1,40 @@ +use nom::{ + bytes::complete::{tag, take_while}, + combinator::verify, + sequence::delimited, + IResult, +}; + +// TODO: text-markup, entities, latex-fragments, subscript and superscript + +#[inline] +pub fn parse_radio_target(input: &str) -> Option<(&str, &str)> { + parse_internal(input).ok() +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, &str, ()> { + let (input, contents) = delimited( + tag("<<<"), + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + ), + tag(">>>"), + )(input)?; + + Ok((input, contents)) +} + +#[test] +fn parse() { + assert_eq!(parse_radio_target("<<>>"), Some(("", "target"))); + assert_eq!(parse_radio_target("<<>>"), Some(("", "tar get"))); + + assert!(parse_radio_target("<<>>").is_none()); + assert!(parse_radio_target("<<< target>>>").is_none()); + assert!(parse_radio_target("<<>>").is_none()); + assert!(parse_radio_target("<<get>>>").is_none()); + assert!(parse_radio_target("<<>>").is_none()); + assert!(parse_radio_target("<<>").is_none()); +} diff --git a/src/elements/rule.rs b/src/elements/rule.rs new file mode 100644 index 0000000..b331746 --- /dev/null +++ b/src/elements/rule.rs @@ -0,0 +1,48 @@ +use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult}; + +use crate::parse::combinators::{blank_lines_count, eol}; + +#[derive(Debug, Default, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +pub struct Rule { + /// Numbers of blank lines between rule line and next non-blank line or + /// buffer's end + pub post_blank: usize, +} + +impl Rule { + pub(crate) fn parse(input: &str) -> Option<(&str, Rule)> { + parse_internal(input).ok() + } +} + +fn parse_internal(input: &str) -> IResult<&str, Rule, ()> { + let (input, _) = space0(input)?; + let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?; + let (input, _) = eol(input)?; + let (input, post_blank) = blank_lines_count(input)?; + Ok((input, Rule { post_blank })) +} + +#[test] +fn parse() { + assert_eq!(Rule::parse("-----"), Some(("", Rule { post_blank: 0 }))); + assert_eq!(Rule::parse("--------"), Some(("", Rule { post_blank: 0 }))); + assert_eq!( + Rule::parse("-----\n\n\n"), + Some(("", Rule { post_blank: 2 })) + ); + assert_eq!(Rule::parse("----- \n"), Some(("", Rule { post_blank: 0 }))); + + assert!(Rule::parse("").is_none()); + assert!(Rule::parse("----").is_none()); + assert!(Rule::parse("----").is_none()); + assert!(Rule::parse("None----").is_none()); + assert!(Rule::parse("None ----").is_none()); + assert!(Rule::parse("None------").is_none()); + assert!(Rule::parse("----None----").is_none()); + assert!(Rule::parse("\t\t----").is_none()); + assert!(Rule::parse("------None").is_none()); + assert!(Rule::parse("----- None").is_none()); +} diff --git a/src/elements/snippet.rs b/src/elements/snippet.rs new file mode 100644 index 0000000..31e2117 --- /dev/null +++ b/src/elements/snippet.rs @@ -0,0 +1,100 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take, take_until, take_while1}, + sequence::{delimited, separated_pair}, + IResult, +}; + +/// Export Snippet Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Snippet<'a> { + /// Back-end name + pub name: Cow<'a, str>, + /// Export code + pub value: Cow<'a, str>, +} + +impl Snippet<'_> { + pub(crate) fn parse(input: &str) -> Option<(&str, Snippet)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Snippet<'static> { + Snippet { + name: self.name.into_owned().into(), + value: self.value.into_owned().into(), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, Snippet, ()> { + let (input, (name, value)) = delimited( + tag("@@"), + separated_pair( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'), + tag(":"), + take_until("@@"), + ), + take(2usize), + )(input)?; + + Ok(( + input, + Snippet { + name: name.into(), + value: value.into(), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + Snippet::parse("@@html:@@"), + Some(( + "", + Snippet { + name: "html".into(), + value: "".into() + } + )) + ); + assert_eq!( + Snippet::parse("@@latex:any arbitrary LaTeX code@@"), + Some(( + "", + Snippet { + name: "latex".into(), + value: "any arbitrary LaTeX code".into(), + } + )) + ); + assert_eq!( + Snippet::parse("@@html:@@"), + Some(( + "", + Snippet { + name: "html".into(), + value: "".into(), + } + )) + ); + assert_eq!( + Snippet::parse("@@html:

@

@@"), + Some(( + "", + Snippet { + name: "html".into(), + value: "

@

".into(), + } + )) + ); + + assert!(Snippet::parse("@@html:@").is_none()); + assert!(Snippet::parse("@@html@@").is_none()); + assert!(Snippet::parse("@@:@@").is_none()); +} diff --git a/src/elements/table.rs b/src/elements/table.rs new file mode 100644 index 0000000..752083a --- /dev/null +++ b/src/elements/table.rs @@ -0,0 +1,169 @@ +use std::borrow::Cow; + +use nom::{ + error::{make_error, ErrorKind}, + Err, IResult, +}; + +use crate::parse::combinators::{blank_lines_count, line, lines_while}; + +/// Table Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(tag = "table_type"))] +pub enum Table<'a> { + /// "org" type table + #[cfg_attr(feature = "ser", serde(rename = "org"))] + Org { + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + tblfm: Option>, + /// Numbers of blank lines between last table's line and next non-blank + /// line or buffer's end + post_blank: usize, + has_header: bool, + }, + /// "table.el" type table + #[cfg_attr(feature = "ser", serde(rename = "table.el"))] + TableEl { + value: Cow<'a, str>, + /// Numbers of blank lines between last table's line and next non-blank + /// line or buffer's end + post_blank: usize, + }, +} + +impl Table<'_> { + pub fn parse_table_el(input: &str) -> Option<(&str, Table)> { + Self::parse_table_el_internal(input).ok() + } + + fn parse_table_el_internal(input: &str) -> IResult<&str, Table, ()> { + let (_, first_line) = line(input)?; + + let first_line = first_line.trim(); + + // Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs + if !first_line.starts_with("+-") + || first_line + .as_bytes() + .iter() + .any(|&c| c != b'+' && c != b'-') + { + // TODO: better error kind + return Err(Err::Error(make_error(input, ErrorKind::Many0))); + } + + // Table.el tables end at the first line not starting with either a vertical line or a plus sign. + let (input, content) = lines_while(|line| { + let line = line.trim_start(); + line.starts_with('|') || line.starts_with('+') + })(input)?; + + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + Table::TableEl { + value: content.into(), + post_blank, + }, + )) + } + + pub fn into_owned(self) -> Table<'static> { + match self { + Table::Org { + tblfm, + post_blank, + has_header, + } => Table::Org { + tblfm: tblfm.map(Into::into).map(Cow::Owned), + post_blank, + has_header, + }, + Table::TableEl { value, post_blank } => Table::TableEl { + value: value.into_owned().into(), + post_blank, + }, + } + } +} + +/// Table Row Element +/// +/// # Syntax +/// +/// ```text +/// | 0 | 1 | 2 | <- TableRow::Body +/// | 0 | 1 | 2 | <- TableRow::Body +/// ``` +/// +/// ```text +/// |-----+-----+-----| <- ignores +/// | 0 | 1 | 2 | <- TableRow::Header +/// | 0 | 1 | 2 | <- TableRow::Header +/// |-----+-----+-----| <- TableRow::HeaderRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- TableRow::BodyRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- TableRow::BodyRule +/// |-----+-----+-----| <- TableRow::BodyRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- ignores +/// ``` +/// +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))] +#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] +pub enum TableRow { + /// This row is part of table header + Header, + /// This row is part of table body + Body, + /// This row is between table header and body + HeaderRule, + /// This row is between table body and next body + BodyRule, +} + +/// Table Cell Element +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))] +#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] +pub enum TableCell { + /// Header cell + Header, + /// Body cell, or standard cell + Body, +} + +#[test] +fn parse_table_el_() { + assert_eq!( + Table::parse_table_el( + r#" +---+ + | | + +---+ + +"# + ), + Some(( + "", + Table::TableEl { + value: r#" +---+ + | | + +---+ +"# + .into(), + post_blank: 1 + } + )) + ); + assert!(Table::parse_table_el("").is_none()); + assert!(Table::parse_table_el("+----|---").is_none()); +} diff --git a/src/elements/target.rs b/src/elements/target.rs new file mode 100644 index 0000000..b847b59 --- /dev/null +++ b/src/elements/target.rs @@ -0,0 +1,78 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take_while}, + combinator::verify, + sequence::delimited, + IResult, +}; + +/// Target Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Target<'a> { + /// Target ID + pub target: Cow<'a, str>, +} + +impl Target<'_> { + #[inline] + pub(crate) fn parse(input: &str) -> Option<(&str, Target)> { + parse_internal(input).ok() + } + + pub fn into_owned(self) -> Target<'static> { + Target { + target: self.target.into_owned().into(), + } + } +} + +#[inline] +fn parse_internal(input: &str) -> IResult<&str, Target, ()> { + let (input, target) = delimited( + tag("<<"), + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + ), + tag(">>"), + )(input)?; + + Ok(( + input, + Target { + target: target.into(), + }, + )) +} + +#[test] +fn parse() { + assert_eq!( + Target::parse("<>"), + Some(( + "", + Target { + target: "target".into() + } + )) + ); + assert_eq!( + Target::parse("<>"), + Some(( + "", + Target { + target: "tar get".into() + } + )) + ); + + assert!(Target::parse("<>").is_none()); + assert!(Target::parse("<< target>>").is_none()); + assert!(Target::parse("<>").is_none()); + assert!(Target::parse("<get>>").is_none()); + assert!(Target::parse("<>").is_none()); + assert!(Target::parse("<").is_none()); +} diff --git a/src/elements/timestamp.rs b/src/elements/timestamp.rs new file mode 100644 index 0000000..15f1255 --- /dev/null +++ b/src/elements/timestamp.rs @@ -0,0 +1,482 @@ +use std::borrow::Cow; + +use nom::{ + bytes::complete::{tag, take, take_till, take_while, take_while_m_n}, + character::complete::{space0, space1}, + combinator::{map, map_res, opt}, + sequence::preceded, + IResult, +}; + +/// Datetime Struct +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Datetime<'a> { + pub year: u16, + pub month: u8, + pub day: u8, + pub dayname: Cow<'a, str>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub hour: Option, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub minute: Option, +} + +impl Datetime<'_> { + pub fn into_owned(self) -> Datetime<'static> { + Datetime { + year: self.year, + month: self.month, + day: self.day, + dayname: self.dayname.into_owned().into(), + hour: self.hour, + minute: self.minute, + } + } +} + +#[cfg(feature = "chrono")] +mod chrono { + use super::Datetime; + use chrono::*; + + impl Into for Datetime<'_> { + fn into(self) -> NaiveDate { + (&self).into() + } + } + + impl Into for Datetime<'_> { + fn into(self) -> NaiveTime { + (&self).into() + } + } + + impl Into for Datetime<'_> { + fn into(self) -> NaiveDateTime { + (&self).into() + } + } + + impl Into> for Datetime<'_> { + fn into(self) -> DateTime { + (&self).into() + } + } + + impl Into for &Datetime<'_> { + fn into(self) -> NaiveDate { + NaiveDate::from_ymd(self.year.into(), self.month.into(), self.day.into()) + } + } + + impl Into for &Datetime<'_> { + fn into(self) -> NaiveTime { + NaiveTime::from_hms( + self.hour.unwrap_or_default().into(), + self.minute.unwrap_or_default().into(), + 0, + ) + } + } + + impl Into for &Datetime<'_> { + fn into(self) -> NaiveDateTime { + NaiveDateTime::new(self.into(), self.into()) + } + } + + impl Into> for &Datetime<'_> { + fn into(self) -> DateTime { + DateTime::from_utc(self.into(), Utc) + } + } +} + +/// Timestamp Object +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] +#[cfg_attr(feature = "ser", serde(tag = "timestamp_type"))] +#[derive(Debug, Clone)] +pub enum Timestamp<'a> { + Active { + start: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + }, + Inactive { + start: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + }, + ActiveRange { + start: Datetime<'a>, + end: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + }, + InactiveRange { + start: Datetime<'a>, + end: Datetime<'a>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + repeater: Option>, + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + delay: Option>, + }, + Diary { + value: Cow<'a, str>, + }, +} + +impl Timestamp<'_> { + pub(crate) fn parse_active(input: &str) -> Option<(&str, Timestamp)> { + parse_active(input).ok() + } + + pub(crate) fn parse_inactive(input: &str) -> Option<(&str, Timestamp)> { + parse_inactive(input).ok() + } + + pub(crate) fn parse_diary(input: &str) -> Option<(&str, Timestamp)> { + parse_diary(input).ok() + } + + pub fn into_owned(self) -> Timestamp<'static> { + match self { + Timestamp::Active { + start, + repeater, + delay, + } => Timestamp::Active { + start: start.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + }, + Timestamp::Inactive { + start, + repeater, + delay, + } => Timestamp::Inactive { + start: start.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + }, + Timestamp::ActiveRange { + start, + end, + repeater, + delay, + } => Timestamp::ActiveRange { + start: start.into_owned(), + end: end.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + }, + Timestamp::InactiveRange { + start, + end, + repeater, + delay, + } => Timestamp::InactiveRange { + start: start.into_owned(), + end: end.into_owned(), + repeater: repeater.map(Into::into).map(Cow::Owned), + delay: delay.map(Into::into).map(Cow::Owned), + }, + Timestamp::Diary { value } => Timestamp::Diary { + value: value.into_owned().into(), + }, + } + } +} + +pub fn parse_active(input: &str) -> IResult<&str, Timestamp, ()> { + let (input, _) = tag("<")(input)?; + let (input, start) = parse_datetime(input)?; + + if input.starts_with('-') { + let (input, (hour, minute)) = parse_time(&input[1..])?; + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag(">")(input)?; + let mut end = start.clone(); + end.hour = Some(hour); + end.minute = Some(minute); + return Ok(( + input, + Timestamp::ActiveRange { + start, + end, + repeater: None, + delay: None, + }, + )); + } + + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag(">")(input)?; + + if input.starts_with("--<") { + let (input, end) = parse_datetime(&input["--<".len()..])?; + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag(">")(input)?; + Ok(( + input, + Timestamp::ActiveRange { + start, + end, + repeater: None, + delay: None, + }, + )) + } else { + Ok(( + input, + Timestamp::Active { + start, + repeater: None, + delay: None, + }, + )) + } +} + +pub fn parse_inactive(input: &str) -> IResult<&str, Timestamp, ()> { + let (input, _) = tag("[")(input)?; + let (input, start) = parse_datetime(input)?; + + if input.starts_with('-') { + let (input, (hour, minute)) = parse_time(&input[1..])?; + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag("]")(input)?; + let mut end = start.clone(); + end.hour = Some(hour); + end.minute = Some(minute); + return Ok(( + input, + Timestamp::InactiveRange { + start, + end, + repeater: None, + delay: None, + }, + )); + } + + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag("]")(input)?; + + if input.starts_with("--[") { + let (input, end) = parse_datetime(&input["--[".len()..])?; + let (input, _) = space0(input)?; + // TODO: delay-or-repeater + let (input, _) = tag("]")(input)?; + Ok(( + input, + Timestamp::InactiveRange { + start, + end, + repeater: None, + delay: None, + }, + )) + } else { + Ok(( + input, + Timestamp::Inactive { + start, + repeater: None, + delay: None, + }, + )) + } +} + +pub fn parse_diary(input: &str) -> IResult<&str, Timestamp, ()> { + let (input, _) = tag("<%%(")(input)?; + let (input, value) = take_till(|c| c == ')' || c == '>' || c == '\n')(input)?; + let (input, _) = tag(")>")(input)?; + + Ok(( + input, + Timestamp::Diary { + value: value.into(), + }, + )) +} + +fn parse_time(input: &str) -> IResult<&str, (u8, u8), ()> { + let (input, hour) = map_res(take_while_m_n(1, 2, |c: char| c.is_ascii_digit()), |num| { + u8::from_str_radix(num, 10) + })(input)?; + let (input, _) = tag(":")(input)?; + let (input, minute) = map_res(take(2usize), |num| u8::from_str_radix(num, 10))(input)?; + Ok((input, (hour, minute))) +} + +fn parse_datetime(input: &str) -> IResult<&str, Datetime, ()> { + let parse_u8 = |num| u8::from_str_radix(num, 10); + + let (input, year) = map_res(take(4usize), |num| u16::from_str_radix(num, 10))(input)?; + let (input, _) = tag("-")(input)?; + let (input, month) = map_res(take(2usize), parse_u8)(input)?; + let (input, _) = tag("-")(input)?; + let (input, day) = map_res(take(2usize), parse_u8)(input)?; + let (input, _) = space1(input)?; + let (input, dayname) = take_while(|c: char| { + !c.is_ascii_whitespace() + && !c.is_ascii_digit() + && c != '+' + && c != '-' + && c != ']' + && c != '>' + })(input)?; + let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| { + (time.map(|t| t.0), time.map(|t| t.1)) + })(input)?; + + Ok(( + input, + Datetime { + year, + month, + day, + dayname: dayname.into(), + hour, + minute, + }, + )) +} + +// TODO +// #[cfg_attr(test, derive(PartialEq))] +// #[cfg_attr(feature = "ser", derive(serde::Serialize))] +// #[derive(Debug, Copy, Clone)] +// pub enum RepeaterType { +// Cumulate, +// CatchUp, +// Restart, +// } + +// #[cfg_attr(test, derive(PartialEq))] +// #[cfg_attr(feature = "ser", derive(serde::Serialize))] +// #[derive(Debug, Copy, Clone)] +// pub enum DelayType { +// All, +// First, +// } + +// #[cfg_attr(test, derive(PartialEq))] +// #[cfg_attr(feature = "ser", derive(serde::Serialize))] +// #[derive(Debug, Copy, Clone)] +// pub enum TimeUnit { +// Hour, +// Day, +// Week, +// Month, +// Year, +// } + +// #[cfg_attr(test, derive(PartialEq))] +// #[cfg_attr(feature = "ser", derive(serde::Serialize))] +// #[derive(Debug, Copy, Clone)] +// pub struct Repeater { +// pub ty: RepeaterType, +// pub value: usize, +// pub unit: TimeUnit, +// } + +// #[cfg_attr(test, derive(PartialEq))] +// #[cfg_attr(feature = "ser", derive(serde::Serialize))] +// #[derive(Debug, Copy, Clone)] +// pub struct Delay { +// pub ty: DelayType, +// pub value: usize, +// pub unit: TimeUnit, +// } + +#[test] +fn parse() { + assert_eq!( + parse_inactive("[2003-09-16 Tue]"), + Ok(( + "", + Timestamp::Inactive { + start: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: None, + minute: None + }, + repeater: None, + delay: None, + }, + )) + ); + assert_eq!( + parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"), + Ok(( + "", + Timestamp::InactiveRange { + start: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(9), + minute: Some(39) + }, + end: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(10), + minute: Some(39), + }, + repeater: None, + delay: None + }, + )) + ); + assert_eq!( + parse_active("<2003-09-16 Tue 09:39-10:39>"), + Ok(( + "", + Timestamp::ActiveRange { + start: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(9), + minute: Some(39), + }, + end: Datetime { + year: 2003, + month: 9, + day: 16, + dayname: "Tue".into(), + hour: Some(10), + minute: Some(39), + }, + repeater: None, + delay: None + }, + )) + ); +} diff --git a/src/elements/title.rs b/src/elements/title.rs new file mode 100644 index 0000000..d8ccb39 --- /dev/null +++ b/src/elements/title.rs @@ -0,0 +1,510 @@ +//! Headline Title + +#[cfg(not(feature = "indexmap"))] +pub type PropertiesMap = std::collections::HashMap; + +#[cfg(feature = "indexmap")] +pub type PropertiesMap = indexmap::IndexMap; + +use std::borrow::Cow; + +use memchr::memrchr2; +use nom::{ + branch::alt, + bytes::complete::{tag, take_until, take_while}, + character::complete::{anychar, line_ending, space1}, + combinator::{map, opt, verify}, + error::{make_error, ErrorKind}, + multi::fold_many0, + sequence::{delimited, preceded}, + Err, IResult, +}; + +use crate::{ + config::ParseConfig, + elements::{drawer::parse_drawer_without_blank, Planning, Timestamp}, + parse::combinators::{blank_lines_count, line, one_word}, +}; + +/// Title Element +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[derive(Debug, Clone)] +pub struct Title<'a> { + /// Headline level, number of stars + pub level: usize, + /// Headline priority cookie + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub priority: Option, + /// Headline title tags + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Vec::is_empty"))] + pub tags: Vec>, + /// Headline todo keyword + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub keyword: Option>, + /// Raw headline's text, without the stars and the tags + pub raw: Cow<'a, str>, + /// Planning element associated to this headline + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + pub planning: Option>>, + /// Property drawer associated to this headline + #[cfg_attr( + feature = "ser", + serde(skip_serializing_if = "PropertiesMap::is_empty") + )] + pub properties: PropertiesMap, Cow<'a, str>>, + /// Numbers of blank lines between last title's line and next non-blank line + /// or buffer's end + pub post_blank: usize, +} + +impl Title<'_> { + pub(crate) fn parse<'a>( + input: &'a str, + config: &ParseConfig, + ) -> Option<(&'a str, (Title<'a>, &'a str))> { + parse_title(input, config).ok() + } + + // TODO: fn is_quoted(&self) -> bool { } + // TODO: fn is_footnote_section(&self) -> bool { } + + /// Returns this headline's closed timestamp, or `None` if not set. + pub fn closed(&self) -> Option<&Timestamp> { + self.planning.as_ref().and_then(|p| p.closed.as_ref()) + } + + /// Returns this headline's scheduled timestamp, or `None` if not set. + pub fn scheduled(&self) -> Option<&Timestamp> { + self.planning.as_ref().and_then(|p| p.scheduled.as_ref()) + } + + /// Returns this headline's deadline timestamp, or `None` if not set. + pub fn deadline(&self) -> Option<&Timestamp> { + self.planning.as_ref().and_then(|p| p.deadline.as_ref()) + } + + /// Returns `true` if this headline is archived + pub fn is_archived(&self) -> bool { + self.tags.iter().any(|tag| tag == "ARCHIVE") + } + + /// Returns `true` if this headline is commented + pub fn is_commented(&self) -> bool { + self.raw.starts_with("COMMENT") + && (self.raw.len() == 7 || self.raw[7..].starts_with(char::is_whitespace)) + } + + pub fn into_owned(self) -> Title<'static> { + Title { + level: self.level, + priority: self.priority, + tags: self + .tags + .into_iter() + .map(|s| s.into_owned().into()) + .collect(), + keyword: self.keyword.map(Into::into).map(Cow::Owned), + raw: self.raw.into_owned().into(), + planning: self.planning.map(|p| Box::new(p.into_owned())), + properties: self + .properties + .into_iter() + .map(|(k, v)| (k.into_owned().into(), v.into_owned().into())) + .collect(), + post_blank: self.post_blank, + } + } +} + +impl Default for Title<'_> { + fn default() -> Title<'static> { + Title { + level: 1, + priority: None, + tags: Vec::new(), + keyword: None, + raw: Cow::Borrowed(""), + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + } + } +} + +fn white_spaces_or_eol(input: &str) -> IResult<&str, &str, ()> { + alt((space1, line_ending))(input) +} + +#[inline] +fn parse_title<'a>( + input: &'a str, + config: &ParseConfig, +) -> IResult<&'a str, (Title<'a>, &'a str), ()> { + let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?; + + debug_assert!(level > 0); + + let (input, keyword) = opt(preceded( + space1, + verify(one_word, |s: &str| { + config.todo_keywords.0.iter().any(|x| x == s) + || config.todo_keywords.1.iter().any(|x| x == s) + }), + ))(input)?; + + let (input, priority) = opt(delimited( + space1, + delimited( + tag("[#"), + verify(anychar, |c: &char| c.is_ascii_uppercase()), + tag("]"), + ), + white_spaces_or_eol, + ))(input)?; + let (input, tail) = line(input)?; + let tail = tail.trim(); + + // tags can be separated by space or \t + let (raw, tags) = memrchr2(b' ', b'\t', tail.as_bytes()) + .map(|i| (tail[0..i].trim(), &tail[i + 1..])) + .filter(|(_, x)| is_tag_line(x)) + .unwrap_or((tail, "")); + + let tags = tags + .split(':') + .filter(|s| !s.is_empty()) + .map(Into::into) + .collect(); + + let (input, planning) = Planning::parse(input) + .map(|(input, planning)| (input, Some(Box::new(planning)))) + .unwrap_or((input, None)); + + let (input, properties) = opt(parse_properties_drawer)(input)?; + let (input, post_blank) = blank_lines_count(input)?; + + Ok(( + input, + ( + Title { + properties: properties.unwrap_or_default(), + level, + keyword: keyword.map(Into::into), + priority, + tags, + raw: raw.into(), + planning, + post_blank, + }, + raw, + ), + )) +} + +fn is_tag_line(input: &str) -> bool { + input.len() > 2 + && input.starts_with(':') + && input.ends_with(':') + && input.chars().all(|ch| { + ch.is_alphanumeric() || ch == '_' || ch == '@' || ch == '#' || ch == '%' || ch == ':' + }) +} + +#[inline] +fn parse_properties_drawer( + input: &str, +) -> IResult<&str, PropertiesMap, Cow<'_, str>>, ()> { + let (input, (drawer, content)) = parse_drawer_without_blank(input.trim_start())?; + if drawer.name != "PROPERTIES" { + return Err(Err::Error(make_error(input, ErrorKind::Tag))); + } + let (_, map) = fold_many0( + parse_node_property, + PropertiesMap::new(), + |mut acc: PropertiesMap<_, _>, (name, value)| { + acc.insert(name.into(), value.into()); + acc + }, + )(content)?; + Ok((input, map)) +} + +#[inline] +fn parse_node_property(input: &str) -> IResult<&str, (&str, &str), ()> { + let (input, _) = blank_lines_count(input)?; + let input = input.trim_start(); + let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| { + s.trim_end_matches('+') + })(input)?; + let (input, value) = line(input)?; + Ok((input, (name, value.trim()))) +} + +#[test] +fn parse_title_() { + use crate::config::DEFAULT_CONFIG; + + assert_eq!( + parse_title("**** DONE [#A] COMMENT Title :tag:a2%:", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: Some("DONE".into()), + priority: Some('A'), + raw: "COMMENT Title".into(), + tags: vec!["tag".into(), "a2%".into()], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "COMMENT Title" + ) + )) + ); + assert_eq!( + parse_title("**** ToDO [#A] COMMENT Title", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: None, + priority: None, + raw: "ToDO [#A] COMMENT Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "ToDO [#A] COMMENT Title" + ) + )) + ); + assert_eq!( + parse_title("**** T0DO [#A] COMMENT Title", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: None, + priority: None, + raw: "T0DO [#A] COMMENT Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "T0DO [#A] COMMENT Title" + ) + )) + ); + assert_eq!( + parse_title("**** DONE [#1] COMMENT Title", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: Some("DONE".into()), + priority: None, + raw: "[#1] COMMENT Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "[#1] COMMENT Title" + ) + )) + ); + assert_eq!( + parse_title("**** DONE [#a] COMMENT Title", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: Some("DONE".into()), + priority: None, + raw: "[#a] COMMENT Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "[#a] COMMENT Title" + ) + )) + ); + + // https://github.com/PoiScript/orgize/issues/20 + assert_eq!( + parse_title("** DONE [#B]::", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 2, + keyword: Some("DONE".into()), + priority: None, + raw: "[#B]::".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "[#B]::" + ) + )) + ); + + assert_eq!( + parse_title("**** Title :tag:a2%", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: None, + priority: None, + raw: "Title :tag:a2%".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "Title :tag:a2%" + ) + )) + ); + assert_eq!( + parse_title("**** Title tag:a2%:", &DEFAULT_CONFIG), + Ok(( + "", + ( + Title { + level: 4, + keyword: None, + priority: None, + raw: "Title tag:a2%:".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "Title tag:a2%:" + ) + )) + ); + + assert_eq!( + parse_title( + "**** DONE Title", + &ParseConfig { + todo_keywords: (vec![], vec![]), + ..Default::default() + } + ), + Ok(( + "", + ( + Title { + level: 4, + keyword: None, + priority: None, + raw: "DONE Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "DONE Title" + ) + )) + ); + assert_eq!( + parse_title( + "**** TASK [#A] Title", + &ParseConfig { + todo_keywords: (vec!["TASK".to_string()], vec![]), + ..Default::default() + } + ), + Ok(( + "", + ( + Title { + level: 4, + keyword: Some("TASK".into()), + priority: Some('A'), + raw: "Title".into(), + tags: vec![], + planning: None, + properties: PropertiesMap::new(), + post_blank: 0, + }, + "Title" + ) + )) + ); +} + +#[test] +fn parse_properties_drawer_() { + assert_eq!( + parse_properties_drawer(" :PROPERTIES:\n :CUSTOM_ID: id\n :END:"), + Ok(( + "", + vec![("CUSTOM_ID".into(), "id".into())] + .into_iter() + .collect::>() + )) + ) +} + +#[test] +fn preserve_properties_drawer_order() { + let mut properties = Vec::default(); + // Use a large number of properties to reduce false pass rate, since HashMap + // is non-deterministic. There are roughly 10^18 possible derangements of this sequence. + for i in 0..20 { + // Avoid alphabetic or numeric order. + let j = (i + 7) % 20; + properties.push(( + Cow::Owned(format!( + "{}{}", + if i % 3 == 0 { + "FOO" + } else if i % 3 == 1 { + "QUX" + } else { + "BAR" + }, + j + )), + Cow::Owned(i.to_string()), + )); + } + + let mut s = String::default(); + for (k, v) in &properties { + s += &format!(" :{}: {}\n", k, v); + } + let drawer = format!(" :PROPERTIES:\n{}:END:\n", &s); + let mut parsed: Vec<(_, _)> = parse_properties_drawer(&drawer) + .unwrap() + .1 + .into_iter() + .collect(); + + #[cfg(not(feature = "indexmap"))] + parsed.sort(); + #[cfg(not(feature = "indexmap"))] + properties.sort(); + + assert_eq!(parsed, properties); +} diff --git a/src/entities.rs b/src/entities.rs deleted file mode 100644 index 5f1faf9..0000000 --- a/src/entities.rs +++ /dev/null @@ -1,468 +0,0 @@ -// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85 -// nil -> false -// t -> true -// \x00A0 -> \\x00A0 -#[rustfmt::skip] -pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[ -// ("* Letters" -// Latin -("Agrave", "\\`{A}", false, "À", "A", "À", "À"), -("agrave", "\\`{a}", false, "à", "a", "à", "à"), -("Aacute", "\\'{A}", false, "Á", "A", "Á", "Á"), -("aacute", "\\'{a}", false, "á", "a", "á", "á"), -("Acirc", "\\^{A}", false, "Â", "A", "Â", "Â"), -("acirc", "\\^{a}", false, "â", "a", "â", "â"), -("Amacr", "\\={A}", false, "Ā", "A", "Ã", "Ã"), -("amacr", "\\={a}", false, "ā", "a", "ã", "ã"), -("Atilde", "\\~{A}", false, "Ã", "A", "Ã", "Ã"), -("atilde", "\\~{a}", false, "ã", "a", "ã", "ã"), -("Auml", "\\\"{A}", false, "Ä", "Ae", "Ä", "Ä"), -("auml", "\\\"{a}", false, "ä", "ae", "ä", "ä"), -("Aring", "\\AA{}", false, "Å", "A", "Å", "Å"), -("AA", "\\AA{}", false, "Å", "A", "Å", "Å"), -("aring", "\\aa{}", false, "å", "a", "å", "å"), -("AElig", "\\AE{}", false, "Æ", "AE", "Æ", "Æ"), -("aelig", "\\ae{}", false, "æ", "ae", "æ", "æ"), -("Ccedil", "\\c{C}", false, "Ç", "C", "Ç", "Ç"), -("ccedil", "\\c{c}", false, "ç", "c", "ç", "ç"), -("Egrave", "\\`{E}", false, "È", "E", "È", "È"), -("egrave", "\\`{e}", false, "è", "e", "è", "è"), -("Eacute", "\\'{E}", false, "É", "E", "É", "É"), -("eacute", "\\'{e}", false, "é", "e", "é", "é"), -("Ecirc", "\\^{E}", false, "Ê", "E", "Ê", "Ê"), -("ecirc", "\\^{e}", false, "ê", "e", "ê", "ê"), -("Euml", "\\\"{E}", false, "Ë", "E", "Ë", "Ë"), -("euml", "\\\"{e}", false, "ë", "e", "ë", "ë"), -("Igrave", "\\`{I}", false, "Ì", "I", "Ì", "Ì"), -("igrave", "\\`{i}", false, "ì", "i", "ì", "ì"), -("Iacute", "\\'{I}", false, "Í", "I", "Í", "Í"), -("iacute", "\\'{i}", false, "í", "i", "í", "í"), -("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"), -("inodot", "\\i", false, "ı", "i", "ı", "ı"), -("Icirc", "\\^{I}", false, "Î", "I", "Î", "Î"), -("icirc", "\\^{i}", false, "î", "i", "î", "î"), -("Iuml", "\\\"{I}", false, "Ï", "I", "Ï", "Ï"), -("iuml", "\\\"{i}", false, "ï", "i", "ï", "ï"), -("Ntilde", "\\~{N}", false, "Ñ", "N", "Ñ", "Ñ"), -("ntilde", "\\~{n}", false, "ñ", "n", "ñ", "ñ"), -("Ograve", "\\`{O}", false, "Ò", "O", "Ò", "Ò"), -("ograve", "\\`{o}", false, "ò", "o", "ò", "ò"), -("Oacute", "\\'{O}", false, "Ó", "O", "Ó", "Ó"), -("oacute", "\\'{o}", false, "ó", "o", "ó", "ó"), -("Ocirc", "\\^{O}", false, "Ô", "O", "Ô", "Ô"), -("ocirc", "\\^{o}", false, "ô", "o", "ô", "ô"), -("Otilde", "\\~{O}", false, "Õ", "O", "Õ", "Õ"), -("otilde", "\\~{o}", false, "õ", "o", "õ", "õ"), -("Ouml", "\\\"{O}", false, "Ö", "Oe", "Ö", "Ö"), -("ouml", "\\\"{o}", false, "ö", "oe", "ö", "ö"), -("Oslash", "\\O", false, "Ø", "O", "Ø", "Ø"), -("oslash", "\\o{}", false, "ø", "o", "ø", "ø"), -("OElig", "\\OE{}", false, "Œ", "OE", "OE", "Œ"), -("oelig", "\\oe{}", false, "œ", "oe", "oe", "œ"), -("Scaron", "\\v{S}", false, "Š", "S", "S", "Š"), -("scaron", "\\v{s}", false, "š", "s", "s", "š"), -("szlig", "\\ss{}", false, "ß", "ss", "ß", "ß"), -("Ugrave", "\\`{U}", false, "Ù", "U", "Ù", "Ù"), -("ugrave", "\\`{u}", false, "ù", "u", "ù", "ù"), -("Uacute", "\\'{U}", false, "Ú", "U", "Ú", "Ú"), -("uacute", "\\'{u}", false, "ú", "u", "ú", "ú"), -("Ucirc", "\\^{U}", false, "Û", "U", "Û", "Û"), -("ucirc", "\\^{u}", false, "û", "u", "û", "û"), -("Uuml", "\\\"{U}", false, "Ü", "Ue", "Ü", "Ü"), -("uuml", "\\\"{u}", false, "ü", "ue", "ü", "ü"), -("Yacute", "\\'{Y}", false, "Ý", "Y", "Ý", "Ý"), -("yacute", "\\'{y}", false, "ý", "y", "ý", "ý"), -("Yuml", "\\\"{Y}", false, "Ÿ", "Y", "Y", "Ÿ"), -("yuml", "\\\"{y}", false, "ÿ", "y", "ÿ", "ÿ"), - -// Latin (special face) -("fnof", "\\textit{f}", false, "ƒ", "f", "f", "ƒ"), -("real", "\\Re", true, "ℜ", "R", "R", "ℜ"), -("image", "\\Im", true, "ℑ", "I", "I", "ℑ"), -("weierp", "\\wp", true, "℘", "P", "P", "℘"), -("ell", "\\ell", true, "ℓ", "ell", "ell", "ℓ"), -("imath", "\\imath", true, "ı", "[dotless i]", "dotless i", "ı"), -("jmath", "\\jmath", true, "ȷ", "[dotless j]", "dotless j", "ȷ"), - -// Greek -("Alpha", "A", false, "Α", "Alpha", "Alpha", "Α"), -("alpha", "\\alpha", true, "α", "alpha", "alpha", "α"), -("Beta", "B", false, "Β", "Beta", "Beta", "Β"), -("beta", "\\beta", true, "β", "beta", "beta", "β"), -("Gamma", "\\Gamma", true, "Γ", "Gamma", "Gamma", "Γ"), -("gamma", "\\gamma", true, "γ", "gamma", "gamma", "γ"), -("Delta", "\\Delta", true, "Δ", "Delta", "Delta", "Δ"), -("delta", "\\delta", true, "δ", "delta", "delta", "δ"), -("Epsilon", "E", false, "Ε", "Epsilon", "Epsilon", "Ε"), -("epsilon", "\\epsilon", true, "ε", "epsilon", "epsilon", "ε"), -("varepsilon", "\\varepsilon", true, "ε", "varepsilon", "varepsilon", "ε"), -("Zeta", "Z", false, "Ζ", "Zeta", "Zeta", "Ζ"), -("zeta", "\\zeta", true, "ζ", "zeta", "zeta", "ζ"), -("Eta", "H", false, "Η", "Eta", "Eta", "Η"), -("eta", "\\eta", true, "η", "eta", "eta", "η"), -("Theta", "\\Theta", true, "Θ", "Theta", "Theta", "Θ"), -("theta", "\\theta", true, "θ", "theta", "theta", "θ"), -("thetasym", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), -("vartheta", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), -("Iota", "I", false, "Ι", "Iota", "Iota", "Ι"), -("iota", "\\iota", true, "ι", "iota", "iota", "ι"), -("Kappa", "K", false, "Κ", "Kappa", "Kappa", "Κ"), -("kappa", "\\kappa", true, "κ", "kappa", "kappa", "κ"), -("Lambda", "\\Lambda", true, "Λ", "Lambda", "Lambda", "Λ"), -("lambda", "\\lambda", true, "λ", "lambda", "lambda", "λ"), -("Mu", "M", false, "Μ", "Mu", "Mu", "Μ"), -("mu", "\\mu", true, "μ", "mu", "mu", "μ"), -("nu", "\\nu", true, "ν", "nu", "nu", "ν"), -("Nu", "N", false, "Ν", "Nu", "Nu", "Ν"), -("Xi", "\\Xi", true, "Ξ", "Xi", "Xi", "Ξ"), -("xi", "\\xi", true, "ξ", "xi", "xi", "ξ"), -("Omicron", "O", false, "Ο", "Omicron", "Omicron", "Ο"), -("omicron", "\\textit{o}", false, "ο", "omicron", "omicron", "ο"), -("Pi", "\\Pi", true, "Π", "Pi", "Pi", "Π"), -("pi", "\\pi", true, "π", "pi", "pi", "π"), -("Rho", "P", false, "Ρ", "Rho", "Rho", "Ρ"), -("rho", "\\rho", true, "ρ", "rho", "rho", "ρ"), -("Sigma", "\\Sigma", true, "Σ", "Sigma", "Sigma", "Σ"), -("sigma", "\\sigma", true, "σ", "sigma", "sigma", "σ"), -("sigmaf", "\\varsigma", true, "ς", "sigmaf", "sigmaf", "ς"), -("varsigma", "\\varsigma", true, "ς", "varsigma", "varsigma", "ς"), -("Tau", "T", false, "Τ", "Tau", "Tau", "Τ"), -("Upsilon", "\\Upsilon", true, "Υ", "Upsilon", "Upsilon", "Υ"), -("upsih", "\\Upsilon", true, "ϒ", "upsilon", "upsilon", "ϒ"), -("upsilon", "\\upsilon", true, "υ", "upsilon", "upsilon", "υ"), -("Phi", "\\Phi", true, "Φ", "Phi", "Phi", "Φ"), -("phi", "\\phi", true, "φ", "phi", "phi", "ɸ"), -("varphi", "\\varphi", true, "ϕ", "varphi", "varphi", "φ"), -("Chi", "X", false, "Χ", "Chi", "Chi", "Χ"), -("chi", "\\chi", true, "χ", "chi", "chi", "χ"), -("acutex", "\\acute x", true, "´x", "'x", "'x", "𝑥́"), -("Psi", "\\Psi", true, "Ψ", "Psi", "Psi", "Ψ"), -("psi", "\\psi", true, "ψ", "psi", "psi", "ψ"), -("tau", "\\tau", true, "τ", "tau", "tau", "τ"), -("Omega", "\\Omega", true, "Ω", "Omega", "Omega", "Ω"), -("omega", "\\omega", true, "ω", "omega", "omega", "ω"), -("piv", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), -("varpi", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), -("partial", "\\partial", true, "∂", "[partial differential]", "[partial differential]", "∂"), - -// Hebrew -("alefsym", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), -("aleph", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), -("gimel", "\\gimel", true, "ℷ", "gimel", "gimel", "ℷ"), -("beth", "\\beth", true, "ℶ", "beth", "beth", "ב"), -("dalet", "\\daleth", true, "ℸ", "dalet", "dalet", "ד"), - -// Icelandic -("ETH", "\\DH{}", false, "Ð", "D", "Ð", "Ð"), -("eth", "\\dh{}", false, "ð", "dh", "ð", "ð"), -("THORN", "\\TH{}", false, "Þ", "TH", "Þ", "Þ"), -("thorn", "\\th{}", false, "þ", "th", "þ", "þ"), - - //, "* Punctuation", -// Dots and Marks -("dots", "\\dots{}", false, "…", "...", "...", "…"), -("cdots", "\\cdots{}", true, "⋯", "...", "...", "⋯"), -("hellip", "\\dots{}", false, "…", "...", "...", "…"), -("middot", "\\textperiodcentered{}", false, "·", ".", "·", "·"), -("iexcl", "!`", false, "¡", "!", "¡", "¡"), -("iquest", "?`", false, "¿", "?", "¿", "¿"), - -// Dash-like -("shy", "\\-", false, "­", "", "", ""), -("ndash", "--", false, "–", "-", "-", "–"), -("mdash", "---", false, "—", "--", "--", "—"), - -// Quotations -("quot", "\\textquotedbl{}", false, """, "\"", "\"", "\""), -("acute", "\\textasciiacute{}", false, "´", "'", "´", "´"), -("ldquo", "\\textquotedblleft{}", false, "“", "\"", "\"", "“"), -("rdquo", "\\textquotedblright{}", false, "”", "\"", "\"", "”"), -("bdquo", "\\quotedblbase{}", false, "„", "\"", "\"", "„"), -("lsquo", "\\textquoteleft{}", false, "‘", "`", "`", "‘"), -("rsquo", "\\textquoteright{}", false, "’", "'", "'", "’"), -("sbquo", "\\quotesinglbase{}", false, "‚", ", ", ", ", "‚"), -("laquo", "\\guillemotleft{}", false, "«", "<<", "«", "«"), -("raquo", "\\guillemotright{}", false, "»", ">>", "»", "»"), -("lsaquo", "\\guilsinglleft{}", false, "‹", "<", "<", "‹"), -("rsaquo", "\\guilsinglright{}", false, "›", ">", ">", "›"), - -//, "* Other", -// Misc. (often used) -("circ", "\\^{}", false, "ˆ", "^", "^", "∘"), -("vert", "\\vert{}", true, "|", "|", "|", "|"), -("vbar", "|", false, "|", "|", "|", "|"), -("brvbar", "\\textbrokenbar{}", false, "¦", "|", "¦", "¦"), -("S", "\\S", false, "§", "section", "§", "§"), -("sect", "\\S", false, "§", "section", "§", "§"), -("P", "\\P{}", false, "¶", "paragraph", "¶", "¶"), -("para", "\\P{}", false, "¶", "paragraph", "¶", "¶"), -("amp", "\\&", false, "&", "&", "&", "&"), -("lt", "\\textless{}", false, "<", "<", "<", "<"), -("gt", "\\textgreater{}", false, ">", ">", ">", ">"), -("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"), -("slash", "/", false, "/", "/", "/", "/"), -("plus", "+", false, "+", "+", "+", "+"), -("under", "\\_", false, "_", "_", "_", "_"), -("equal", "=", false, "=", "=", "=", "="), -("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"), -("dagger", "\\textdagger{}", false, "†", "[dagger]", "[dagger]", "†"), -("dag", "\\dag{}", false, "†", "[dagger]", "[dagger]", "†"), -("Dagger", "\\textdaggerdbl{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), -("ddag", "\\ddag{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), - -// Whitespace -("nbsp", "~", false, " ", ", ", "\\x00A0", "\\x00A0"), -("ensp", "\\hspace*{.5em}", false, " ", ", ", ", ", " "), -("emsp", "\\hspace*{1em}", false, " ", ", ", ", ", " "), -("thinsp", "\\hspace*{.2em}", false, " ", ", ", ", ", " "), - -// Currency -("curren", "\\textcurrency{}", false, "¤", "curr.", "¤", "¤"), -("cent", "\\textcent{}", false, "¢", "cent", "¢", "¢"), -("pound", "\\pounds{}", false, "£", "pound", "£", "£"), -("yen", "\\textyen{}", false, "¥", "yen", "¥", "¥"), -("euro", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), -("EUR", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), -("dollar", "\\$", false, "$", "$", "$", "$"), -("USD", "\\$", false, "$", "$", "$", "$"), - -// Property Marks -("copy", "\\textcopyright{}", false, "©", "(c)", "©", "©"), -("reg", "\\textregistered{}", false, "®", "(r)", "®", "®"), -("trade", "\\texttrademark{}", false, "™", "TM", "TM", "™"), - -// Science, etrueal. -("minus", "-", true, "−", "-", "-", "−"), -("pm", "\\textpm{}", false, "±", "+-", "±", "±"), -("plusmn", "\\textpm{}", false, "±", "+-", "±", "±"), -("times", "\\texttimes{}", false, "×", "*", "×", "×"), -("frasl", "/", false, "⁄", "/", "/", "⁄"), -("colon", "\\colon", true, ":", ":", ":", ":"), -("div", "\\textdiv{}", false, "÷", "/", "÷", "÷"), -("frac12", "\\textonehalf{}", false, "½", "1/2", "½", "½"), -("frac14", "\\textonequarter{}", false, "¼", "1/4", "¼", "¼"), -("frac34", "\\textthreequarters{}", false, "¾", "3/4", "¾", "¾"), -("permil", "\\textperthousand{}", false, "‰", "per thousand", "per thousand", "‰"), -("sup1", "\\textonesuperior{}", false, "¹", "^1", "¹", "¹"), -("sup2", "\\texttwosuperior{}", false, "²", "^2", "²", "²"), -("sup3", "\\textthreesuperior{}", false, "³", "^3", "³", "³"), -("radic", "\\sqrt{\\,}", true, "√", "[square root]", "[square root]", "√"), -("sum", "\\sum", true, "∑", "[sum]", "[sum]", "∑"), -("prod", "\\prod", true, "∏", "[product]", "[n-ary product]", "∏"), -("micro", "\\textmu{}", false, "µ", "micro", "µ", "µ"), -("macr", "\\textasciimacron{}", false, "¯", "[macron]", "¯", "¯"), -("deg", "\\textdegree{}", false, "°", "degree", "°", "°"), -("prime", "\\prime", true, "′", "'", "'", "′"), -("Prime", "\\prime{}\\prime", true, "″", "''", "''", "″"), -("infin", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), -("infty", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), -("prop", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), -("propto", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), -("not", "\\textlnot{}", false, "¬", "[angled dash]", "¬", "¬"), -("neg", "\\neg{}", true, "¬", "[angled dash]", "¬", "¬"), -("land", "\\land", true, "∧", "[logical and]", "[logical and]", "∧"), -("wedge", "\\wedge", true, "∧", "[logical and]", "[logical and]", "∧"), -("lor", "\\lor", true, "∨", "[logical or]", "[logical or]", "∨"), -("vee", "\\vee", true, "∨", "[logical or]", "[logical or]", "∨"), -("cap", "\\cap", true, "∩", "[intersection]", "[intersection]", "∩"), -("cup", "\\cup", true, "∪", "[union]", "[union]", "∪"), -("smile", "\\smile", true, "⌣", "[cup product]", "[cup product]", "⌣"), -("frown", "\\frown", true, "⌢", "[Cap product]", "[cap product]", "⌢"), -("int", "\\int", true, "∫", "[integral]", "[integral]", "∫"), -("therefore", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), -("there4", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), -("because", "\\because", true, "∵", "[because]", "[because]", "∵"), -("sim", "\\sim", true, "∼", "~", "~", "∼"), -("cong", "\\cong", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), -("simeq", "\\simeq", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), -("asymp", "\\asymp", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), -("approx", "\\approx", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), -("ne", "\\ne", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), -("neq", "\\neq", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), -("equiv", "\\equiv", true, "≡", "[identical to]", "[identical to]", "≡"), - -("triangleq", "\\triangleq", true, "≜", "[defined to]", "[defined to]", "≜"), -("le", "\\le", true, "≤", "<=", "<=", "≤"), -("leq", "\\le", true, "≤", "<=", "<=", "≤"), -("ge", "\\ge", true, "≥", ">=", ">=", "≥"), -("geq", "\\ge", true, "≥", ">=", ">=", "≥"), -("lessgtr", "\\lessgtr", true, "≶", "[less than or greater than]", "[less than or greater than]", "≶"), -("lesseqgtr", "\\lesseqgtr", true, "⋚", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", "⋚"), -("ll", "\\ll", true, "≪", "<<", "<<", "≪"), -("Ll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), -("lll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), -("gg", "\\gg", true, "≫", ">>", ">>", "≫"), -("Gg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), -("ggg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), -("prec", "\\prec", true, "≺", "[precedes]", "[precedes]", "≺"), -("preceq", "\\preceq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), -("preccurlyeq", "\\preccurlyeq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), -("succ", "\\succ", true, "≻", "[succeeds]", "[succeeds]", "≻"), -("succeq", "\\succeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), -("succcurlyeq", "\\succcurlyeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), -("sub", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), -("subset", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), -("sup", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), -("supset", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), -("nsub", "\\not\\subset", true, "⊄", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", "⊄"), -("sube", "\\subseteq", true, "⊆", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", "⊆"), -("nsup", "\\not\\supset", true, "⊅", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", "⊅"), -("supe", "\\supseteq", true, "⊇", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", "⊇"), -("setminus", "\\setminus", true, "∖", "\\", "\\", "⧵"), -("forall", "\\forall", true, "∀", "[for all]", "[for all]", "∀"), -("exist", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), -("exists", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), -("nexist", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), -("nexists", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), -("empty", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), -("emptyset", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), -("isin", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), -("in", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), -("notin", "\\notin", true, "∉", "[, notruean, elementrueof]", "[, notruean, elementrueof]", "∉"), -("ni", "\\ni", true, "∋", "[contains as member]", "[contains as member]", "∋"), -("nabla", "\\nabla", true, "∇", "[nabla]", "[nabla]", "∇"), -("ang", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), -("angle", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), -("perp", "\\perp", true, "⊥", "[up tack]", "[up tack]", "⊥"), -("parallel", "\\parallel", true, "∥", "||", "||", "∥"), -("sdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), -("cdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), -("lceil", "\\lceil", true, "⌈", "[, leftrueceiling]", "[, leftrueceiling]", "⌈"), -("rceil", "\\rceil", true, "⌉", "[, rightrueceiling]", "[, rightrueceiling]", "⌉"), -("lfloor", "\\lfloor", true, "⌊", "[, leftruefloor]", "[, leftruefloor]", "⌊"), -("rfloor", "\\rfloor", true, "⌋", "[, rightruefloor]", "[, rightruefloor]", "⌋"), -("lang", "\\langle", true, "⟨", "<", "<", "⟨"), -("rang", "\\rangle", true, "⟩", ">", ">", "⟩"), -("langle", "\\langle", true, "⟨", "<", "<", "⟨"), -("rangle", "\\rangle", true, "⟩", ">", ">", "⟩"), -("hbar", "\\hbar", true, "ℏ", "hbar", "hbar", "ℏ"), -("mho", "\\mho", true, "℧", "mho", "mho", "℧"), - -// Arrows -("larr", "\\leftarrow", true, "←", "<-", "<-", "←"), -("leftarrow", "\\leftarrow", true, "←", "<-", "<-", "←"), -("gets", "\\gets", true, "←", "<-", "<-", "←"), -("lArr", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), -("Leftarrow", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), -("uarr", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), -("uparrow", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), -("uArr", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), -("Uparrow", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), -("rarr", "\\rightarrow", true, "→", "->", "->", "→"), -("to", "\\to", true, "→", "->", "->", "→"), -("rightarrow", "\\rightarrow", true, "→", "->", "->", "→"), -("rArr", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), -("Rightarrow", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), -("darr", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), -("downarrow", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), -("dArr", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), -("Downarrow", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), -("harr", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), -("leftrightarrow", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), -("hArr", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), -("Leftrightarrow", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), -("crarr", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), -("hookleftarrow", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), - -// Function names -("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"), -("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"), -("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"), -("arg", "\\arg", true, "arg", "arg", "arg", "arg"), -("cos", "\\cos", true, "cos", "cos", "cos", "cos"), -("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"), -("cot", "\\cot", true, "cot", "cot", "cot", "cot"), -("coth", "\\coth", true, "coth", "coth", "coth", "coth"), -("csc", "\\csc", true, "csc", "csc", "csc", "csc"), -("deg", "\\deg", true, "°", "deg", "deg", "deg"), -("det", "\\det", true, "det", "det", "det", "det"), -("dim", "\\dim", true, "dim", "dim", "dim", "dim"), -("exp", "\\exp", true, "exp", "exp", "exp", "exp"), -("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"), -("hom", "\\hom", true, "hom", "hom", "hom", "hom"), -("inf", "\\inf", true, "inf", "inf", "inf", "inf"), -("ker", "\\ker", true, "ker", "ker", "ker", "ker"), -("lg", "\\lg", true, "lg", "lg", "lg", "lg"), -("lim", "\\lim", true, "lim", "lim", "lim", "lim"), -("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"), -("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"), -("ln", "\\ln", true, "ln", "ln", "ln", "ln"), -("log", "\\log", true, "log", "log", "log", "log"), -("max", "\\max", true, "max", "max", "max", "max"), -("min", "\\min", true, "min", "min", "min", "min"), -("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"), -("sec", "\\sec", true, "sec", "sec", "sec", "sec"), -("sin", "\\sin", true, "sin", "sin", "sin", "sin"), -("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"), -("sup", "\\sup", true, "⊃", "sup", "sup", "sup"), -("tan", "\\tan", true, "tan", "tan", "tan", "tan"), -("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"), - -// Signs & Symbols -("bull", "\\textbullet{}", false, "•", "*", "*", "•"), -("bullet", "\\textbullet{}", false, "•", "*", "*", "•"), -("star", "\\star", true, "*", "*", "*", "⋆"), -("lowast", "\\ast", true, "∗", "*", "*", "∗"), -("ast", "\\ast", true, "∗", "*", "*", "*"), -("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"), -("oplus", "\\oplus", true, "⊕", "[circled plus]", "[circled plus]", "⊕"), -("otimes", "\\otimes", true, "⊗", "[circled times]", "[circled times]", "⊗"), -("check", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), -("checkmark", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), - -// Miscellaneous (seldom used) -("ordf", "\\textordfeminine{}", false, "ª", "_a_", "ª", "ª"), -("ordm", "\\textordmasculine{}", false, "º", "_o_", "º", "º"), -("cedil", "\\c{}", false, "¸", "[cedilla]", "¸", "¸"), -("oline", "\\overline{~}", true, "‾", "[overline]", "¯", "‾"), -("uml", "\\textasciidieresis{}", false, "¨", "[diaeresis]", "¨", "¨"), -("zwnj", "\\/{}", false, "‌", "", "", "‌"), -("zwj", "", false, "‍", "", "", "‍"), -("lrm", "", false, "‎", "", "", "\u{200E}"), -("rlm", "", false, "‏", "", "", "\u{200F}"), - -// Smilies -("smiley", "\\ddot\\smile", true, "☺", ":-)", ":-)", "☺"), -("blacksmile", "\\ddot\\smile", true, "☻", ":-)", ":-)", "☻"), -("sad", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), -("frowny", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), - -// Suits -("clubs", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), -("clubsuit", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), -("spades", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), -("spadesuit", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), -("hearts", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), -("heartsuit", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), -("diams", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), -("diamondsuit", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), -("diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), -("Diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), -("loz", "\\lozenge", true, "◊", "[lozenge]", "[lozenge]", "⧫"), - -// spaces -// fish shell: -// for i in (seq 1 20) -// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")' -// end -(" ", "\\hspace*{0.5em}", true, " ", " ", " ", "\\x2002"), -(" ", "\\hspace*{1em}", true, "  ", " ", " ", "\\x2002\\x2002"), -(" ", "\\hspace*{1.5em}", true, "   ", " ", " ", "\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{2em}", true, "    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{2.5em}", true, "     ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{3em}", true, "      ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{3.5em}", true, "       ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{4em}", true, "        ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{4.5em}", true, "         ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{5em}", true, "          ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{5.5em}", true, "           ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{6em}", true, "            ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{6.5em}", true, "             ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{7em}", true, "              ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{7.5em}", true, "               ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{8em}", true, "                ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{8.5em}", true, "                 ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{9em}", true, "                  ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{9.5em}", true, "                   ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -(" ", "\\hspace*{10em}", true, "                    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), -]; diff --git a/src/export/event.rs b/src/export/event.rs deleted file mode 100644 index 46f65c6..0000000 --- a/src/export/event.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::ast::*; - -#[non_exhaustive] -#[derive(Clone, PartialEq, Eq, Debug)] -pub enum Container { - Document(Document), - Section(Section), - Paragraph(Paragraph), - Headline(Headline), - - OrgTable(OrgTable), - OrgTableRow(OrgTableRow), - OrgTableCell(OrgTableCell), - TableEl(TableEl), - - List(List), - ListItem(ListItem), - Drawer(Drawer), - DynBlock(DynBlock), - - FnDef(FnDef), - Comment(Comment), - FixedWidth(FixedWidth), - SpecialBlock(SpecialBlock), - QuoteBlock(QuoteBlock), - CenterBlock(CenterBlock), - VerseBlock(VerseBlock), - CommentBlock(CommentBlock), - ExampleBlock(ExampleBlock), - ExportBlock(ExportBlock), - SourceBlock(SourceBlock), - - Link(Link), - RadioTarget(RadioTarget), - FnRef(FnRef), - Target(Target), - Bold(Bold), - Strike(Strike), - Italic(Italic), - Underline(Underline), - Verbatim(Verbatim), - Code(Code), - Superscript(Superscript), - Subscript(Subscript), - BabelCall(BabelCall), - PropertyDrawer(PropertyDrawer), - AffiliatedKeyword(AffiliatedKeyword), - Keyword(Keyword), -} - -#[non_exhaustive] -#[derive(Clone, PartialEq, Eq, Debug)] -pub enum Event { - Enter(Container), - Leave(Container), - - Text(Token), - Macros(Macros), - Cookie(Cookie), - InlineCall(InlineCall), - InlineSrc(InlineSrc), - Clock(Clock), - LineBreak(LineBreak), - Snippet(Snippet), - Rule(Rule), - Timestamp(Timestamp), - LatexFragment(LatexFragment), - LatexEnvironment(LatexEnvironment), - Entity(Entity), - - #[cfg(feature = "syntax-org-fc")] - Cloze(Cloze), -} diff --git a/src/export/html.rs b/src/export/html.rs index fa31d42..e0b0bd1 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -1,12 +1,10 @@ -use rowan::NodeOrToken; -use std::cmp::min; use std::fmt; -use std::fmt::Write as _; +use std::io::{Error, Result as IOResult, Write}; -use super::event::{Container, Event}; -use super::TraversalContext; -use super::Traverser; -use crate::{SyntaxElement, SyntaxKind, SyntaxNode}; +use jetscii::{bytes, BytesConst}; + +use crate::elements::{Element, Table, TableCell, TableRow, Timestamp}; +use crate::export::write_datetime; /// A wrapper for escaping sensitive characters in html. /// @@ -28,7 +26,11 @@ impl> fmt::Display for HtmlEscape { let content = self.0.as_ref(); let bytes = content.as_bytes(); - while let Some(off) = jetscii::bytes!(b'<', b'>', b'&', b'\'', b'"').find(&bytes[pos..]) { + lazy_static::lazy_static! { + static ref ESCAPE_BYTES: BytesConst = bytes!(b'<', b'>', b'&', b'\'', b'"'); + } + + while let Some(off) = ESCAPE_BYTES.find(&bytes[pos..]) { write!(f, "{}", &content[pos..pos + off])?; pos += off + 1; @@ -39,7 +41,7 @@ impl> fmt::Display for HtmlEscape { b'&' => write!(f, "&")?, b'\'' => write!(f, "'")?, b'"' => write!(f, """)?, - _ => {} + _ => unreachable!(), } } @@ -47,295 +49,349 @@ impl> fmt::Display for HtmlEscape { } } +pub trait HtmlHandler>: Default { + fn start(&mut self, w: W, element: &Element) -> Result<(), E>; + fn end(&mut self, w: W, element: &Element) -> Result<(), E>; +} + +/// Default Html Handler #[derive(Default)] -pub struct HtmlExport { - output: String, +pub struct DefaultHtmlHandler; - in_descriptive_list: Vec, +impl HtmlHandler for DefaultHtmlHandler { + fn start(&mut self, mut w: W, element: &Element) -> IOResult<()> { + match element { + // container elements + Element::SpecialBlock(_) => (), + Element::QuoteBlock(_) => write!(w, "
")?, + Element::CenterBlock(_) => write!(w, "
")?, + Element::VerseBlock(_) => write!(w, "

")?, + Element::Bold => write!(w, "")?, + Element::Document { .. } => write!(w, "

")?, + Element::DynBlock(_dyn_block) => (), + Element::Headline { .. } => (), + Element::List(list) => { + if list.ordered { + write!(w, "
    ")?; + } else { + write!(w, "
      ")?; + } + } + Element::Italic => write!(w, "")?, + Element::ListItem(_) => write!(w, "
    • ")?, + Element::Paragraph { .. } => write!(w, "

      ")?, + Element::Section => write!(w, "

      ")?, + Element::Strike => write!(w, "")?, + Element::Underline => write!(w, "")?, + // non-container elements + Element::CommentBlock(_) => (), + Element::ExampleBlock(block) => write!( + w, + "
      {}
      ", + HtmlEscape(&block.contents) + )?, + Element::ExportBlock(block) => { + if block.data.eq_ignore_ascii_case("HTML") { + write!(w, "{}", block.contents)? + } + } + Element::SourceBlock(block) => { + if block.language.is_empty() { + write!( + w, + "
      {}
      ", + HtmlEscape(&block.contents) + )?; + } else { + write!( + w, + "
      {}
      ", + block.language, + HtmlEscape(&block.contents) + )?; + } + } + Element::BabelCall(_) => (), + Element::InlineSrc(inline_src) => write!( + w, + "{}", + inline_src.lang, + HtmlEscape(&inline_src.body) + )?, + Element::Code { value } => write!(w, "{}", HtmlEscape(value))?, + Element::FnRef(_fn_ref) => (), + Element::InlineCall(_) => (), + Element::Link(link) => write!( + w, + "{}", + HtmlEscape(&link.path), + HtmlEscape(link.desc.as_ref().unwrap_or(&link.path)), + )?, + Element::Macros(_macros) => (), + Element::RadioTarget => (), + Element::Snippet(snippet) => { + if snippet.name.eq_ignore_ascii_case("HTML") { + write!(w, "{}", snippet.value)?; + } + } + Element::Target(_target) => (), + Element::Text { value } => write!(w, "{}", HtmlEscape(value))?, + Element::Timestamp(timestamp) => { + write!( + &mut w, + "" + )?; - table_row: TableRow, -} + match timestamp { + Timestamp::Active { start, .. } => { + write_datetime(&mut w, "<", start, ">")?; + } + Timestamp::Inactive { start, .. } => { + write_datetime(&mut w, "[", start, "]")?; + } + Timestamp::ActiveRange { start, end, .. } => { + write_datetime(&mut w, "<", start, ">–")?; + write_datetime(&mut w, "<", end, ">")?; + } + Timestamp::InactiveRange { start, end, .. } => { + write_datetime(&mut w, "[", start, "]–")?; + write_datetime(&mut w, "[", end, "]")?; + } + Timestamp::Diary { value } => { + write!(&mut w, "<%%({})>", HtmlEscape(value))? + } + } -#[derive(Default, PartialEq, Eq)] -enum TableRow { - #[default] - HeaderRule, - Header, - BodyRule, - Body, -} + write!(&mut w, "")?; + } + Element::Verbatim { value } => write!(&mut w, "{}", HtmlEscape(value))?, + Element::FnDef(_fn_def) => (), + Element::Clock(_clock) => (), + Element::Comment(_) => (), + Element::FixedWidth(fixed_width) => write!( + w, + "
      {}
      ", + HtmlEscape(&fixed_width.value) + )?, + Element::Keyword(_keyword) => (), + Element::Drawer(_drawer) => (), + Element::Rule(_) => write!(w, "
      ")?, + Element::Cookie(cookie) => write!(w, "{}", cookie.value)?, + Element::Title(title) => { + write!(w, "", if title.level <= 6 { title.level } else { 6 })?; + } + Element::Table(Table::TableEl { .. }) => (), + Element::Table(Table::Org { has_header, .. }) => { + write!(w, "")?; + if *has_header { + write!(w, "")?; + } else { + write!(w, "")?; + } + } + Element::TableRow(row) => match row { + TableRow::Body => write!(w, "")?, + TableRow::BodyRule => write!(w, "")?, + TableRow::Header => write!(w, "")?, + TableRow::HeaderRule => write!(w, "")?, + }, + Element::TableCell(cell) => match cell { + TableCell::Body => write!(w, "
      ")?, + TableCell::Header => write!(w, "")?, + }, + } -impl HtmlExport { - pub fn push_str(&mut self, s: impl AsRef) { - self.output += s.as_ref(); + Ok(()) } - pub fn finish(self) -> String { - self.output - } + fn end(&mut self, mut w: W, element: &Element) -> IOResult<()> { + match element { + // container elements + Element::SpecialBlock(_) => (), + Element::QuoteBlock(_) => write!(w, "")?, + Element::CenterBlock(_) => write!(w, "")?, + Element::VerseBlock(_) => write!(w, "

      ")?, + Element::Bold => write!(w, "")?, + Element::Document { .. } => write!(w, "")?, + Element::DynBlock(_dyn_block) => (), + Element::Headline { .. } => (), + Element::List(list) => { + if list.ordered { + write!(w, "")?; + } else { + write!(w, "")?; + } + } + Element::Italic => write!(w, "")?, + Element::ListItem(_) => write!(w, "")?, + Element::Paragraph { .. } => write!(w, "

      ")?, + Element::Section => write!(w, "")?, + Element::Strike => write!(w, "")?, + Element::Underline => write!(w, "")?, + Element::Title(title) => { + write!(w, "", if title.level <= 6 { title.level } else { 6 })? + } + Element::Table(Table::TableEl { .. }) => (), + Element::Table(Table::Org { .. }) => { + write!(w, "
      ")?; + } + Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => { + write!(w, "")?; + } + Element::TableCell(cell) => match cell { + TableCell::Body => write!(w, "")?, + TableCell::Header => write!(w, "")?, + }, + // non-container elements + _ => debug_assert!(!element.is_container()), + } - /// Render syntax node to html string + Ok(()) + } +} + +#[cfg(feature = "syntect")] +mod syntect_handler { + use super::*; + use std::marker::PhantomData; + + use syntect::{ + easy::HighlightLines, + highlighting::ThemeSet, + html::{styled_line_to_highlighted_html, IncludeBackground}, + parsing::SyntaxSet, + }; + + /// Syntect Html Handler + /// + /// Simple Usage: /// /// ```rust - /// use orgize::{Org, ast::Bold, export::HtmlExport, rowan::ast::AstNode}; + /// use orgize::Org; + /// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler}; /// - /// let org = Org::parse("* /hello/ *world*"); - /// let bold = org.first_node::().unwrap(); - /// let mut html = HtmlExport::default(); - /// html.render(bold.syntax()); - /// assert_eq!(html.finish(), "world"); + /// let mut handler = SyntectHtmlHandler::new(DefaultHtmlHandler); + /// let org = Org::parse("src_rust{println!(\"Hello\")}"); + /// + /// let mut vec = vec![]; + /// + /// org.write_html_custom(&mut vec, &mut handler).unwrap(); /// ``` - pub fn render(&mut self, node: &SyntaxNode) { - let mut ctx = TraversalContext::default(); - self.element(SyntaxElement::Node(node.clone()), &mut ctx); + /// + /// Customize: + /// + /// ```rust,no_run + /// // orgize has re-exported the whole syntect crate + /// use orgize::syntect::parsing::SyntaxSet; + /// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler}; + /// + /// let mut handler = SyntectHtmlHandler { + /// syntax_set: { + /// let set = SyntaxSet::load_defaults_newlines(); + /// let mut builder = set.into_builder(); + /// // add extra language syntax + /// builder.add_from_folder("path/to/syntax/dir", true).unwrap(); + /// builder.build() + /// }, + /// // specify theme + /// theme: String::from("Solarized (dark)"), + /// inner: DefaultHtmlHandler, + /// ..Default::default() + /// }; + /// + /// // Make sure to check if theme presents or it will panic at runtime + /// if handler.theme_set.themes.contains_key("dont-exists") { + /// + /// } + /// ``` + pub struct SyntectHtmlHandler, H: HtmlHandler> { + /// syntax set, default is `SyntaxSet::load_defaults_newlines()` + pub syntax_set: SyntaxSet, + /// theme set, default is `ThemeSet::load_defaults()` + pub theme_set: ThemeSet, + /// theme used for highlighting, default is `"InspiredGitHub"` + pub theme: String, + /// inner html handler + pub inner: H, + /// background color, default is `IncludeBackground::No` + pub background: IncludeBackground, + /// handler error type + pub error_type: PhantomData, } -} -impl Traverser for HtmlExport { - fn event(&mut self, event: Event, ctx: &mut TraversalContext) { - match event { - Event::Enter(Container::Document(_)) => self.output += "
      ", - Event::Leave(Container::Document(_)) => self.output += "
      ", - - Event::Enter(Container::Headline(headline)) => { - let level = min(headline.level(), 6); - let _ = write!(&mut self.output, ""); - for elem in headline.title() { - self.element(elem, ctx); - } - let _ = write!(&mut self.output, ""); + impl, H: HtmlHandler> SyntectHtmlHandler { + pub fn new(inner: H) -> Self { + SyntectHtmlHandler { + inner, + ..Default::default() } - Event::Leave(Container::Headline(_)) => {} + } - Event::Enter(Container::Paragraph(_)) => self.output += "

      ", - Event::Leave(Container::Paragraph(_)) => self.output += "

      ", + fn highlight(&self, language: Option<&str>, content: &str) -> String { + let mut highlighter = HighlightLines::new( + language + .and_then(|lang| self.syntax_set.find_syntax_by_token(lang)) + .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()), + &self.theme_set.themes[&self.theme], + ); + let regions = highlighter.highlight(content, &self.syntax_set); + styled_line_to_highlighted_html(®ions[..], self.background) + } + } - Event::Enter(Container::Section(_)) => self.output += "
      ", - Event::Leave(Container::Section(_)) => self.output += "
      ", - - Event::Enter(Container::Italic(_)) => self.output += "", - Event::Leave(Container::Italic(_)) => self.output += "", - - Event::Enter(Container::Bold(_)) => self.output += "", - Event::Leave(Container::Bold(_)) => self.output += "", - - Event::Enter(Container::Strike(_)) => self.output += "", - Event::Leave(Container::Strike(_)) => self.output += "", - - Event::Enter(Container::Underline(_)) => self.output += "", - Event::Leave(Container::Underline(_)) => self.output += "", - - Event::Enter(Container::Verbatim(_)) => self.output += "", - Event::Leave(Container::Verbatim(_)) => self.output += "", - - Event::Enter(Container::Code(_)) => self.output += "", - Event::Leave(Container::Code(_)) => self.output += "", - - Event::Enter(Container::SourceBlock(block)) => { - if let Some(language) = block.language() { - let _ = write!( - &mut self.output, - r#"
      "#,
      -                        HtmlEscape(&language)
      -                    );
      -                } else {
      -                    self.output += r#"
      "#
      -                }
      +    impl, H: HtmlHandler> Default for SyntectHtmlHandler {
      +        fn default() -> Self {
      +            SyntectHtmlHandler {
      +                syntax_set: SyntaxSet::load_defaults_newlines(),
      +                theme_set: ThemeSet::load_defaults(),
      +                theme: String::from("InspiredGitHub"),
      +                inner: H::default(),
      +                background: IncludeBackground::No,
      +                error_type: PhantomData,
                   }
      -            Event::Leave(Container::SourceBlock(_)) => self.output += "
      ", + } + } - Event::Enter(Container::QuoteBlock(_)) => self.output += "
      ", - Event::Leave(Container::QuoteBlock(_)) => self.output += "
      ", - - Event::Enter(Container::VerseBlock(_)) => self.output += "

      ", - Event::Leave(Container::VerseBlock(_)) => self.output += "

      ", - - Event::Enter(Container::ExampleBlock(_)) => self.output += "
      ",
      -            Event::Leave(Container::ExampleBlock(_)) => self.output += "
      ", - - Event::Enter(Container::CenterBlock(_)) => self.output += "
      ", - Event::Leave(Container::CenterBlock(_)) => self.output += "
      ", - - Event::Enter(Container::CommentBlock(_)) => self.output += "", - - Event::Enter(Container::Comment(_)) => self.output += "", - - Event::Enter(Container::Subscript(_)) => self.output += "", - Event::Leave(Container::Subscript(_)) => self.output += "", - - Event::Enter(Container::Superscript(_)) => self.output += "", - Event::Leave(Container::Superscript(_)) => self.output += "", - - Event::Enter(Container::List(list)) => { - self.output += if list.is_ordered() { - self.in_descriptive_list.push(false); - "
        " - } else if list.is_descriptive() { - self.in_descriptive_list.push(true); - "
        " - } else { - self.in_descriptive_list.push(false); - "
          " - }; - } - Event::Leave(Container::List(list)) => { - self.output += if list.is_ordered() { - "
      " - } else if let Some(true) = self.in_descriptive_list.last() { - "" - } else { - "
    " - }; - self.in_descriptive_list.pop(); - } - Event::Enter(Container::ListItem(list_item)) => { - if let Some(&true) = self.in_descriptive_list.last() { - self.output += "
    "; - for elem in list_item.tag() { - self.element(elem, ctx); - } - self.output += "
    "; - } else { - self.output += "
  1. "; - } - } - Event::Leave(Container::ListItem(_)) => { - if let Some(&true) = self.in_descriptive_list.last() { - self.output += "
  2. "; - } else { - self.output += ""; - } - } - - Event::Enter(Container::OrgTable(table)) => { - self.output += ""; - self.table_row = if table.has_header() { - TableRow::HeaderRule - } else { - TableRow::BodyRule - } - } - Event::Leave(Container::OrgTable(_)) => { - match self.table_row { - TableRow::Body => self.output += "", - TableRow::Header => self.output += "", - _ => {} - } - self.output += "
    "; - } - Event::Enter(Container::OrgTableRow(row)) => { - if row.is_rule() { - match self.table_row { - TableRow::Body => { - self.output += ""; - self.table_row = TableRow::BodyRule; - } - TableRow::Header => { - self.output += ""; - self.table_row = TableRow::BodyRule; - } - _ => {} - } - ctx.skip(); - } else { - match self.table_row { - TableRow::HeaderRule => { - self.table_row = TableRow::Header; - self.output += ""; - } - TableRow::BodyRule => { - self.table_row = TableRow::Body; - self.output += ""; - } - _ => {} - } - self.output += ""; - } - } - Event::Leave(Container::OrgTableRow(row)) => { - if row.is_rule() { - match self.table_row { - TableRow::Body => { - self.output += ""; - self.table_row = TableRow::BodyRule; - } - TableRow::Header => { - self.output += ""; - self.table_row = TableRow::BodyRule; - } - _ => {} - } - ctx.skip(); - } else { - self.output += ""; - } - } - Event::Enter(Container::OrgTableCell(_)) => self.output += "", - Event::Leave(Container::OrgTableCell(_)) => self.output += "", - - Event::Enter(Container::Link(link)) => { - let path = link.path(); - let path = path.trim_start_matches("file:"); - - if link.is_image() { - let _ = write!(&mut self.output, r#""#, HtmlEscape(&path)); - return ctx.skip(); - } - - let _ = write!(&mut self.output, r#""#, HtmlEscape(&path)); - - if !link.has_description() { - let _ = write!(&mut self.output, "{}", HtmlEscape(&path)); - ctx.skip(); - } - } - Event::Leave(Container::Link(_)) => self.output += "", - - Event::Text(text) => { - let _ = write!(&mut self.output, "{}", HtmlEscape(text)); - } - - Event::LineBreak(_) => self.output += "
    ", - - Event::Snippet(snippet) => { - if snippet.backend().eq_ignore_ascii_case("html") { - self.output += &snippet.value(); - } - } - - Event::Rule(_) => self.output += "
    ", - - Event::Timestamp(timestamp) => { - self.output += r#""#; - for e in timestamp.syntax.children_with_tokens() { - match e { - NodeOrToken::Token(t) if t.kind() == SyntaxKind::MINUS2 => { - self.output += "–"; - } - NodeOrToken::Token(t) => { - self.output += t.text(); - } - _ => {} + impl, H: HtmlHandler> HtmlHandler for SyntectHtmlHandler { + fn start(&mut self, mut w: W, element: &Element) -> Result<(), E> { + match element { + Element::InlineSrc(inline_src) => write!( + w, + "{}", + self.highlight(Some(&inline_src.lang), &inline_src.body) + )?, + Element::SourceBlock(block) => { + if block.language.is_empty() { + write!(w, "
    {}
    ", block.contents)?; + } else { + write!( + w, + "
    {}
    ", + block.language, + self.highlight(Some(&block.language), &block.contents) + )?; } } - self.output += r#"
    "#; + Element::FixedWidth(fixed_width) => write!( + w, + "
    {}
    ", + self.highlight(None, &fixed_width.value) + )?, + Element::ExampleBlock(block) => write!( + w, + "
    {}
    ", + self.highlight(None, &block.contents) + )?, + _ => self.inner.start(w, element)?, } + Ok(()) + } - Event::LatexFragment(latex) => { - let _ = write!(&mut self.output, "{}", &latex.syntax); - } - Event::LatexEnvironment(latex) => { - let _ = write!(&mut self.output, "{}", &latex.syntax); - } - - // ignores keyword - Event::Enter(Container::Keyword(_)) => ctx.skip(), - - Event::Entity(entity) => self.output += entity.html(), - - _ => {} + fn end(&mut self, w: W, element: &Element) -> Result<(), E> { + self.inner.end(w, element) } } } + +#[cfg(feature = "syntect")] +pub use syntect_handler::SyntectHtmlHandler; diff --git a/src/export/markdown.rs b/src/export/markdown.rs deleted file mode 100644 index b75b1ee..0000000 --- a/src/export/markdown.rs +++ /dev/null @@ -1,186 +0,0 @@ -use std::cmp::min; -use std::fmt::Write as _; - -use crate::{SyntaxElement, SyntaxNode}; - -use super::event::{Container, Event}; -use super::TraversalContext; -use super::Traverser; - -#[derive(Default)] -pub struct MarkdownExport { - output: String, - - inside_blockquote: bool, -} - -impl MarkdownExport { - pub fn push_str(&mut self, s: impl AsRef) { - self.output += s.as_ref(); - } - - /// Render syntax node to markdown string - /// - /// ```rust - /// use orgize::{Org, ast::Bold, export::MarkdownExport, rowan::ast::AstNode}; - /// - /// let org = Org::parse("* /hello/ *world*"); - /// let bold = org.first_node::().unwrap(); - /// let mut markdown = MarkdownExport::default(); - /// markdown.render(bold.syntax()); - /// assert_eq!(markdown.finish(), "**world**"); - /// ``` - pub fn render(&mut self, node: &SyntaxNode) { - let mut ctx = TraversalContext::default(); - self.element(SyntaxElement::Node(node.clone()), &mut ctx); - } - - pub fn finish(self) -> String { - self.output - } - - fn follows_newline(&mut self) { - if !self.output.is_empty() && !self.output.ends_with(['\n', '\r']) { - self.output += "\n"; - } - } -} - -impl Traverser for MarkdownExport { - fn event(&mut self, event: Event, ctx: &mut TraversalContext) { - match event { - Event::Enter(Container::Document(_)) => {} - Event::Leave(Container::Document(_)) => {} - - Event::Enter(Container::Headline(headline)) => { - self.follows_newline(); - let level = min(headline.level(), 6); - let _ = write!(&mut self.output, "{} ", "#".repeat(level)); - for elem in headline.title() { - self.element(elem, ctx); - } - } - Event::Leave(Container::Headline(_)) => {} - - Event::Enter(Container::Paragraph(_)) => {} - Event::Leave(Container::Paragraph(_)) => self.output += "\n", - - Event::Enter(Container::Section(_)) => self.follows_newline(), - Event::Leave(Container::Section(_)) => {} - - Event::Enter(Container::Italic(_)) => self.output += "*", - Event::Leave(Container::Italic(_)) => self.output += "*", - - Event::Enter(Container::Bold(_)) => self.output += "**", - Event::Leave(Container::Bold(_)) => self.output += "**", - - Event::Enter(Container::Strike(_)) => self.output += "~~", - Event::Leave(Container::Strike(_)) => self.output += "~~", - - Event::Enter(Container::Underline(_)) => {} - Event::Leave(Container::Underline(_)) => {} - - Event::Enter(Container::Verbatim(_)) - | Event::Leave(Container::Verbatim(_)) - | Event::Enter(Container::Code(_)) - | Event::Leave(Container::Code(_)) => self.output += "`", - - Event::Enter(Container::SourceBlock(block)) => { - self.follows_newline(); - self.output += "```"; - if let Some(language) = block.language() { - self.output += &language; - } - } - Event::Leave(Container::SourceBlock(_)) => self.output += "```\n", - - Event::Enter(Container::QuoteBlock(_)) => { - self.inside_blockquote = true; - self.follows_newline(); - self.output += "> "; - } - Event::Leave(Container::QuoteBlock(_)) => self.inside_blockquote = false, - - Event::Enter(Container::CommentBlock(_)) => self.output += "", - - Event::Enter(Container::Comment(_)) => self.output += "", - - Event::Enter(Container::Subscript(_)) => self.output += "", - Event::Leave(Container::Subscript(_)) => self.output += "", - - Event::Enter(Container::Superscript(_)) => self.output += "", - Event::Leave(Container::Superscript(_)) => self.output += "", - - Event::Enter(Container::List(_list)) => {} - Event::Leave(Container::List(_list)) => {} - - Event::Enter(Container::ListItem(list_item)) => { - self.follows_newline(); - self.output += &" ".repeat(list_item.indent()); - self.output += &list_item.bullet(); - } - Event::Leave(Container::ListItem(_)) => {} - - Event::Enter(Container::OrgTable(_table)) => {} - Event::Leave(Container::OrgTable(_)) => {} - Event::Enter(Container::OrgTableRow(_row)) => {} - Event::Leave(Container::OrgTableRow(_row)) => {} - Event::Enter(Container::OrgTableCell(_)) => {} - Event::Leave(Container::OrgTableCell(_)) => {} - - Event::Enter(Container::Link(link)) => { - let path = link.path(); - let path = path.trim_start_matches("file:"); - - if link.is_image() { - let _ = write!(&mut self.output, "![]({path})"); - return ctx.skip(); - } - - if !link.has_description() { - let _ = write!(&mut self.output, r#"[{}]({})"#, &path, &path); - return ctx.skip(); - } - - self.output += "["; - } - Event::Leave(Container::Link(link)) => { - let _ = write!(&mut self.output, r#"]({})"#, &*link.path()); - } - - Event::Text(text) => { - if self.inside_blockquote { - for (idx, line) in text.split('\n').enumerate() { - if idx != 0 { - self.output += "\n> "; - } - self.output += line; - } - } else { - self.output += &*text; - } - } - - Event::LineBreak(_) => {} - - Event::Snippet(_snippet) => {} - - Event::Rule(_) => self.output += "\n-----\n", - - Event::Timestamp(_timestamp) => {} - - Event::LatexFragment(latex) => { - let _ = write!(&mut self.output, "{}", &latex.syntax); - } - Event::LatexEnvironment(latex) => { - let _ = write!(&mut self.output, "{}", &latex.syntax); - } - - Event::Entity(entity) => self.output += entity.utf8(), - - _ => {} - } - } -} diff --git a/src/export/mod.rs b/src/export/mod.rs index afada80..4eddfba 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -1,11 +1,31 @@ //! Export `Org` struct to various formats. -mod event; mod html; -mod markdown; -mod traverse; +mod org; -pub use event::{Container, Event}; -pub use html::{HtmlEscape, HtmlExport}; -pub use markdown::MarkdownExport; -pub use traverse::{from_fn, from_fn_with_ctx, FromFn, FromFnWithCtx, TraversalContext, Traverser}; +#[cfg(feature = "syntect")] +pub use html::SyntectHtmlHandler; +pub use html::{DefaultHtmlHandler, HtmlEscape, HtmlHandler}; +pub use org::{DefaultOrgHandler, OrgHandler}; + +use std::io::{Error, Write}; + +use crate::elements::Datetime; + +pub(crate) fn write_datetime( + mut w: W, + start: &str, + datetime: &Datetime, + end: &str, +) -> Result<(), Error> { + write!(w, "{}", start)?; + write!( + w, + "{}-{:02}-{:02} {}", + datetime.year, datetime.month, datetime.day, datetime.dayname + )?; + if let (Some(hour), Some(minute)) = (datetime.hour, datetime.minute) { + write!(w, " {:02}:{:02}", hour, minute)?; + } + write!(w, "{}", end) +} diff --git a/src/export/org.rs b/src/export/org.rs new file mode 100644 index 0000000..7e860d8 --- /dev/null +++ b/src/export/org.rs @@ -0,0 +1,321 @@ +use std::io::{Error, Result as IOResult, Write}; + +use crate::elements::{Clock, Element, Table, Timestamp}; +use crate::export::write_datetime; + +pub trait OrgHandler>: Default { + fn start(&mut self, w: W, element: &Element) -> Result<(), E>; + fn end(&mut self, w: W, element: &Element) -> Result<(), E>; +} + +#[derive(Default)] +pub struct DefaultOrgHandler; + +impl OrgHandler for DefaultOrgHandler { + fn start(&mut self, mut w: W, element: &Element) -> IOResult<()> { + match element { + // container elements + Element::SpecialBlock(block) => { + writeln!(w, "#+BEGIN_{}", block.name)?; + write_blank_lines(&mut w, block.pre_blank)?; + } + Element::QuoteBlock(block) => { + writeln!(&mut w, "#+BEGIN_QUOTE")?; + write_blank_lines(&mut w, block.pre_blank)?; + } + Element::CenterBlock(block) => { + writeln!(&mut w, "#+BEGIN_CENTER")?; + write_blank_lines(&mut w, block.pre_blank)?; + } + Element::VerseBlock(block) => { + writeln!(&mut w, "#+BEGIN_VERSE")?; + write_blank_lines(&mut w, block.pre_blank)?; + } + Element::Bold => write!(w, "*")?, + Element::Document { pre_blank } => { + write_blank_lines(w, *pre_blank)?; + } + Element::DynBlock(dyn_block) => { + write!(&mut w, "#+BEGIN: {}", dyn_block.block_name)?; + if let Some(parameters) = &dyn_block.arguments { + write!(&mut w, " {}", parameters)?; + } + write_blank_lines(&mut w, dyn_block.pre_blank + 1)?; + } + Element::Headline { .. } => (), + Element::List(_list) => (), + Element::Italic => write!(w, "/")?, + Element::ListItem(list_item) => { + for _ in 0..list_item.indent { + write!(&mut w, " ")?; + } + write!(&mut w, "{}", list_item.bullet)?; + } + Element::Paragraph { .. } => (), + Element::Section => (), + Element::Strike => write!(w, "+")?, + Element::Underline => write!(w, "_")?, + Element::Drawer(drawer) => { + writeln!(&mut w, ":{}:", drawer.name)?; + write_blank_lines(&mut w, drawer.pre_blank)?; + } + // non-container elements + Element::CommentBlock(block) => { + writeln!(&mut w, "#+BEGIN_COMMENT")?; + write!(&mut w, "{}", block.contents)?; + writeln!(&mut w, "#+END_COMMENT")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::ExampleBlock(block) => { + writeln!(&mut w, "#+BEGIN_EXAMPLE")?; + write!(&mut w, "{}", block.contents)?; + writeln!(&mut w, "#+END_EXAMPLE")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::ExportBlock(block) => { + writeln!(&mut w, "#+BEGIN_EXPORT {}", block.data)?; + write!(&mut w, "{}", block.contents)?; + writeln!(&mut w, "#+END_EXPORT")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::SourceBlock(block) => { + writeln!(&mut w, "#+BEGIN_SRC {}", block.language)?; + write!(&mut w, "{}", block.contents)?; + writeln!(&mut w, "#+END_SRC")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::BabelCall(call) => { + writeln!(&mut w, "#+CALL: {}", call.value)?; + write_blank_lines(w, call.post_blank)?; + } + Element::InlineSrc(inline_src) => { + write!(&mut w, "src_{}", inline_src.lang)?; + if let Some(options) = &inline_src.options { + write!(&mut w, "[{}]", options)?; + } + write!(&mut w, "{{{}}}", inline_src.body)?; + } + Element::Code { value } => write!(w, "~{}~", value)?, + Element::FnRef(fn_ref) => { + write!(&mut w, "[fn:{}", fn_ref.label)?; + if let Some(definition) = &fn_ref.definition { + write!(&mut w, ":{}", definition)?; + } + write!(&mut w, "]")?; + } + Element::InlineCall(inline_call) => { + write!(&mut w, "call_{}", inline_call.name)?; + if let Some(header) = &inline_call.inside_header { + write!(&mut w, "[{}]", header)?; + } + write!(&mut w, "({})", inline_call.arguments)?; + if let Some(header) = &inline_call.end_header { + write!(&mut w, "[{}]", header)?; + } + } + Element::Link(link) => { + write!(&mut w, "[[{}]", link.path)?; + if let Some(desc) = &link.desc { + write!(&mut w, "[{}]", desc)?; + } + write!(&mut w, "]")?; + } + Element::Macros(_macros) => (), + Element::RadioTarget => (), + Element::Snippet(snippet) => write!(w, "@@{}:{}@@", snippet.name, snippet.value)?, + Element::Target(_target) => (), + Element::Text { value } => write!(w, "{}", value)?, + Element::Timestamp(timestamp) => { + write_timestamp(&mut w, ×tamp)?; + } + Element::Verbatim { value } => write!(w, "={}=", value)?, + Element::FnDef(fn_def) => { + write_blank_lines(w, fn_def.post_blank)?; + } + Element::Clock(clock) => { + write!(w, "CLOCK: ")?; + + match clock { + Clock::Closed { + start, + end, + duration, + post_blank, + .. + } => { + write_datetime(&mut w, "[", &start, "]--")?; + write_datetime(&mut w, "[", &end, "]")?; + writeln!(&mut w, " => {}", duration)?; + write_blank_lines(&mut w, *post_blank)?; + } + Clock::Running { + start, post_blank, .. + } => { + write_datetime(&mut w, "[", &start, "]\n")?; + write_blank_lines(&mut w, *post_blank)?; + } + } + } + Element::Comment(comment) => { + write!(w, "{}", comment.value)?; + write_blank_lines(&mut w, comment.post_blank)?; + } + Element::FixedWidth(fixed_width) => { + write!(&mut w, "{}", fixed_width.value)?; + write_blank_lines(&mut w, fixed_width.post_blank)?; + } + Element::Keyword(keyword) => { + write!(&mut w, "#+{}", keyword.key)?; + if let Some(optional) = &keyword.optional { + write!(&mut w, "[{}]", optional)?; + } + writeln!(&mut w, ": {}", keyword.value)?; + write_blank_lines(&mut w, keyword.post_blank)?; + } + Element::Rule(rule) => { + writeln!(w, "-----")?; + write_blank_lines(&mut w, rule.post_blank)?; + } + Element::Cookie(_cookie) => (), + Element::Title(title) => { + for _ in 0..title.level { + write!(&mut w, "*")?; + } + if let Some(keyword) = &title.keyword { + write!(&mut w, " {}", keyword)?; + } + if let Some(priority) = title.priority { + write!(&mut w, " [#{}]", priority)?; + } + write!(&mut w, " ")?; + } + Element::Table(_) => (), + Element::TableRow(_) => (), + Element::TableCell(_) => (), + } + + Ok(()) + } + + fn end(&mut self, mut w: W, element: &Element) -> IOResult<()> { + match element { + // container elements + Element::SpecialBlock(block) => { + writeln!(&mut w, "#+END_{}", block.name)?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::QuoteBlock(block) => { + writeln!(&mut w, "#+END_QUOTE")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::CenterBlock(block) => { + writeln!(&mut w, "#+END_CENTER")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::VerseBlock(block) => { + writeln!(&mut w, "#+END_VERSE")?; + write_blank_lines(&mut w, block.post_blank)?; + } + Element::Bold => write!(w, "*")?, + Element::Document { .. } => (), + Element::DynBlock(dyn_block) => { + writeln!(w, "#+END:")?; + write_blank_lines(w, dyn_block.post_blank)?; + } + Element::Headline { .. } => (), + Element::List(list) => { + write_blank_lines(w, list.post_blank)?; + } + Element::Italic => write!(w, "/")?, + Element::ListItem(_) => (), + Element::Paragraph { post_blank } => { + write_blank_lines(w, post_blank + 1)?; + } + Element::Section => (), + Element::Strike => write!(w, "+")?, + Element::Underline => write!(w, "_")?, + Element::Drawer(drawer) => { + writeln!(&mut w, ":END:")?; + write_blank_lines(&mut w, drawer.post_blank)?; + } + Element::Title(title) => { + if !title.tags.is_empty() { + write!(&mut w, " :")?; + for tag in &title.tags { + write!(&mut w, "{}:", tag)?; + } + } + writeln!(&mut w)?; + if let Some(planning) = &title.planning { + if let Some(scheduled) = &planning.scheduled { + write!(&mut w, "SCHEDULED: ")?; + write_timestamp(&mut w, &scheduled)?; + } + if let Some(deadline) = &planning.deadline { + if planning.scheduled.is_some() { + write!(&mut w, " ")?; + } + write!(&mut w, "DEADLINE: ")?; + write_timestamp(&mut w, &deadline)?; + } + if let Some(closed) = &planning.closed { + if planning.deadline.is_some() { + write!(&mut w, " ")?; + } + write!(&mut w, "CLOSED: ")?; + write_timestamp(&mut w, &closed)?; + } + writeln!(&mut w)?; + } + if !title.properties.is_empty() { + writeln!(&mut w, ":PROPERTIES:")?; + for (key, value) in &title.properties { + writeln!(&mut w, ":{}: {}", key, value)?; + } + writeln!(&mut w, ":END:")?; + } + write_blank_lines(&mut w, title.post_blank)?; + } + Element::Table(Table::Org { post_blank, .. }) => { + write_blank_lines(w, *post_blank)?; + } + Element::Table(Table::TableEl { post_blank, .. }) => { + write_blank_lines(w, *post_blank)?; + } + Element::TableRow(_) => (), + Element::TableCell(_) => (), + // non-container elements + _ => debug_assert!(!element.is_container()), + } + + Ok(()) + } +} + +fn write_blank_lines(mut w: W, count: usize) -> Result<(), Error> { + for _ in 0..count { + writeln!(w)?; + } + Ok(()) +} + +fn write_timestamp(mut w: W, timestamp: &Timestamp) -> Result<(), Error> { + match timestamp { + Timestamp::Active { start, .. } => { + write_datetime(w, "<", start, ">")?; + } + Timestamp::Inactive { start, .. } => { + write_datetime(w, "[", start, "]")?; + } + Timestamp::ActiveRange { start, end, .. } => { + write_datetime(&mut w, "<", start, ">--")?; + write_datetime(&mut w, "<", end, ">")?; + } + Timestamp::InactiveRange { start, end, .. } => { + write_datetime(&mut w, "[", start, "]--")?; + write_datetime(&mut w, "[", end, "]")?; + } + Timestamp::Diary { value } => write!(w, "<%%({})>", value)?, + } + Ok(()) +} diff --git a/src/export/traverse.rs b/src/export/traverse.rs deleted file mode 100644 index d53b7b8..0000000 --- a/src/export/traverse.rs +++ /dev/null @@ -1,282 +0,0 @@ -use crate::ast::*; -use crate::syntax::{SyntaxElement, SyntaxKind}; -use rowan::ast::AstNode; -use SyntaxKind::*; - -use super::event::{Container, Event}; - -#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)] -enum TraversalControl { - Up, - Stop, - Skip, - #[default] - Continue, -} - -#[derive(Default)] -pub struct TraversalContext { - control: TraversalControl, -} - -impl TraversalContext { - /// Stops traversal completely - pub fn stop(&mut self) { - self.control = TraversalControl::Stop; - } - /// Skips traversal of the current node's siblings - pub fn up(&mut self) { - self.control = TraversalControl::Up; - } - /// Skips traversal of the current node's descendants - pub fn skip(&mut self) { - self.control = TraversalControl::Skip; - } - /// Continues traversal - pub fn r#continue(&mut self) { - self.control = TraversalControl::Continue; - } -} - -/// A trait for enumerating org syntax tree -/// -/// ### `TraversalContext` -/// -/// `TraversalContext` can be used to control the traversal. -/// -/// For example, `ctx.skip()` will skips the traversal for current -/// element and its descendants and improve the traversal performance. -/// -/// ```rust -/// use orgize::{ -/// export::{Container, Event, HtmlExport, TraversalContext, Traverser}, -/// Org, -/// }; -/// use slugify::slugify; -/// -/// #[derive(Default)] -/// struct Toc(HtmlExport); -/// -/// impl Traverser for Toc { -/// fn event(&mut self, event: Event, ctx: &mut TraversalContext) { -/// match event { -/// Event::Enter(Container::Headline(headline)) => { -/// let title = headline.title().map(|e| e.to_string()).collect::(); -/// self.0.push_str(&format!("", slugify!(&title))); -/// for elem in headline.title() { -/// self.element(elem, ctx); -/// } -/// self.0.push_str(""); -/// if headline.headlines().count() > 0 { -/// self.0.push_str("
      "); -/// } -/// } -/// Event::Leave(Container::Headline(headline)) => { -/// if headline.headlines().count() > 0 { -/// self.0.push_str("
    "); -/// } -/// } -/// Event::Enter(Container::Section(_)) | Event::Leave(Container::Section(_)) => ctx.skip(), -/// Event::Enter(Container::Document(_)) | Event::Leave(Container::Document(_)) => {} -/// _ => self.0.event(event, ctx), -/// } -/// } -/// } -/// -/// let org = Org::parse(r#" -/// * heading 1 -/// section 1 -/// ** heading 1.1 -/// ** heading 1.2 -/// * heading 2 -/// section 2 -/// * heading 3 -/// **** heading 3.1"#); -/// let mut toc = Toc::default(); -/// org.traverse(&mut toc); -/// assert_eq!(toc.0.finish(), "\ -/// heading 1\ -/// \ -/// heading 2\ -/// heading 3\ -/// "); -/// ``` -pub trait Traverser { - /// Handles traversal event - fn event(&mut self, event: Event, ctx: &mut TraversalContext); - - fn element(&mut self, element: SyntaxElement, ctx: &mut TraversalContext) { - macro_rules! take_control { - () => { - match ctx.control { - TraversalControl::Stop => { - ctx.control = TraversalControl::Stop; - return; - } - TraversalControl::Up => { - ctx.control = TraversalControl::Skip; - return; - } - TraversalControl::Skip => { - ctx.control = TraversalControl::Continue; - return; - } - TraversalControl::Continue => {} - } - }; - } - - match element { - SyntaxElement::Node(node) => { - macro_rules! walk { - ($ast:ident) => {{ - debug_assert!($ast::can_cast(node.kind())); - let node = $ast { syntax: node }; - self.event(Event::Enter(Container::$ast(node.clone())), ctx); - take_control!(); - for child in node.syntax.children_with_tokens() { - self.element(child, ctx); - take_control!(); - } - self.event(Event::Leave(Container::$ast(node.clone())), ctx); - take_control!(); - }}; - (@$ast:ident) => {{ - debug_assert!($ast::can_cast(node.kind())); - let node = $ast { syntax: node }; - self.event(Event::$ast(node), ctx); - take_control!(); - }}; - } - - match node.kind() { - DOCUMENT => walk!(Document), - HEADLINE => walk!(Headline), - SECTION => walk!(Section), - PARAGRAPH => walk!(Paragraph), - BOLD => walk!(Bold), - ITALIC => walk!(Italic), - STRIKE => walk!(Strike), - UNDERLINE => walk!(Underline), - LIST => walk!(List), - LIST_ITEM => walk!(ListItem), - CODE => walk!(Code), - INLINE_CALL => walk!(@InlineCall), - INLINE_SRC => walk!(@InlineSrc), - RULE => walk!(@Rule), - VERBATIM => walk!(Verbatim), - SPECIAL_BLOCK => walk!(SpecialBlock), - QUOTE_BLOCK => walk!(QuoteBlock), - CENTER_BLOCK => walk!(CenterBlock), - VERSE_BLOCK => walk!(VerseBlock), - COMMENT_BLOCK => walk!(CommentBlock), - EXAMPLE_BLOCK => walk!(ExampleBlock), - EXPORT_BLOCK => walk!(ExportBlock), - SOURCE_BLOCK => walk!(SourceBlock), - BABEL_CALL => walk!(BabelCall), - CLOCK => walk!(@Clock), - COOKIE => walk!(@Cookie), - RADIO_TARGET => walk!(RadioTarget), - DRAWER => walk!(Drawer), - DYN_BLOCK => walk!(DynBlock), - FN_DEF => walk!(FnDef), - FN_REF => walk!(FnRef), - MACROS => walk!(@Macros), - SNIPPET => walk!(@Snippet), - TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp), - TARGET => walk!(Target), - COMMENT => walk!(Comment), - FIXED_WIDTH => walk!(FixedWidth), - ORG_TABLE => walk!(OrgTable), - ORG_TABLE_RULE_ROW | ORG_TABLE_STANDARD_ROW => walk!(OrgTableRow), - ORG_TABLE_CELL => walk!(OrgTableCell), - LINK => walk!(Link), - LATEX_FRAGMENT => walk!(@LatexFragment), - LATEX_ENVIRONMENT => walk!(@LatexEnvironment), - ENTITY => walk!(@Entity), - LINE_BREAK => walk!(@LineBreak), - SUPERSCRIPT => walk!(Superscript), - SUBSCRIPT => walk!(Subscript), - KEYWORD => walk!(Keyword), - PROPERTY_DRAWER => walk!(PropertyDrawer), - #[cfg(feature = "syntax-org-fc")] - CLOZE => walk!(@Cloze), - BLOCK_CONTENT | LIST_ITEM_CONTENT => { - for child in node.children_with_tokens() { - self.element(child, ctx); - take_control!(); - } - } - _ => {} - } - } - SyntaxElement::Token(token) => { - if token.kind() == TEXT { - self.event(Event::Text(Token(token)), ctx); - take_control!(); - } - } - }; - } -} - -pub struct FromFn(F); - -impl Traverser for FromFn { - fn event(&mut self, event: Event, _: &mut TraversalContext) { - (self.0)(event) - } -} - -pub struct FromFnWithCtx(F); - -impl Traverser for FromFnWithCtx { - fn event(&mut self, event: Event, ctx: &mut TraversalContext) { - (self.0)(event, ctx) - } -} - -/// A helper for creating traverser -/// -/// ```rust -/// use orgize::{ -/// export::{from_fn, Container, Event, Traverser}, -/// Org, -/// }; -/// -/// let mut count = 0; -/// let mut handler = from_fn(|event| { -/// if matches!(event, Event::Enter(Container::Headline(_))) { -/// count += 1; -/// } -/// }); -/// Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler); -/// assert_eq!(count, 3); -/// ``` -pub fn from_fn(f: F) -> FromFn { - FromFn(f) -} - -/// A helper for creating traverser -/// -/// ```rust -/// use orgize::{ -/// export::{from_fn_with_ctx, Container, Event, Traverser}, -/// Org, -/// }; -/// -/// let mut count = 0; -/// let mut handler = from_fn_with_ctx(|event, ctx| { -/// if let Event::Enter(Container::Headline(hdl)) = event { -/// count += 1; -/// if &hdl.title_raw() == "cow" { -/// ctx.stop(); -/// } -/// } -/// }); -/// Org::parse("* 1\n* cow\n* 3").traverse(&mut handler); -/// assert_eq!(count, 2); -/// ``` -pub fn from_fn_with_ctx(f: F) -> FromFnWithCtx { - FromFnWithCtx(f) -} diff --git a/src/headline.rs b/src/headline.rs new file mode 100644 index 0000000..49c3617 --- /dev/null +++ b/src/headline.rs @@ -0,0 +1,1219 @@ +use indextree::NodeId; +use std::borrow::Cow; +use std::ops::RangeInclusive; +use std::usize; + +use crate::{ + config::ParseConfig, + elements::{Element, Title}, + parsers::{parse_container, Container, OwnedArena}, + validate::{ValidationError, ValidationResult}, + Org, +}; + +/// Represents the document in `Org` struct. +/// +/// Each `Org` struct only has one `Document`. +#[derive(Copy, Clone, Debug)] +pub struct Document { + doc_n: NodeId, + sec_n: Option, +} + +impl Document { + pub(crate) fn from_org(org: &Org) -> Document { + let sec_n = org.arena[org.root] + .first_child() + .and_then(|n| match org[n] { + Element::Section => Some(n), + Element::Headline { .. } => None, + _ => unreachable!("Document should only contains section and headline."), + }); + + Document { + doc_n: org.root, + sec_n, + } + } + + /// Returns the ID of the section element of this document, + /// or `None` if it has no section. + pub fn section_node(self) -> Option { + self.sec_n + } + + /// Returns an iterator of this document's children. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// ** h1 + /// ** h2 + /// *** h2_1 + /// *** h2_2 + /// ** h3 + /// "#, + /// ); + /// + /// let d = org.document(); + /// + /// let mut iter = d.children(&org); + /// + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1"); + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h2"); + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h3"); + /// assert!(iter.next().is_none()); + /// ``` + pub fn children<'a>(self, org: &'a Org) -> impl Iterator + 'a { + self.doc_n + .children(&org.arena) + // skip section if exists + .skip(if self.sec_n.is_some() { 1 } else { 0 }) + .map(move |n| match org[n] { + Element::Headline { level } => Headline::from_node(n, level, org), + _ => unreachable!(), + }) + } + + /// Returns the first child of this document, or `None` if it has no child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// ** h1 + /// ** h2 + /// *** h2_1 + /// *** h2_2 + /// ** h3 + /// "#, + /// ); + /// + /// let d = org.document(); + /// + /// assert_eq!(d.first_child(&org).unwrap().title(&org).raw, "h1"); + /// ``` + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let org = Org::new(); + /// + /// assert!(org.document().first_child(&org).is_none()); + /// ``` + pub fn first_child(self, org: &Org) -> Option { + self.doc_n + .children(&org.arena) + // skip section if exists + .nth(if self.sec_n.is_some() { 1 } else { 0 }) + .map(move |n| match org[n] { + Element::Headline { level } => Headline::from_node(n, level, org), + _ => unreachable!(), + }) + } + + /// Returns the last child of this document, or `None` if it has no child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let d = org.document(); + /// + /// assert_eq!(d.last_child(&org).unwrap().title(&org).raw, "h1_3"); + /// ``` + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let org = Org::new(); + /// + /// assert!(org.document().last_child(&org).is_none()); + /// ``` + pub fn last_child(self, org: &Org) -> Option { + org.arena[self.doc_n] + .last_child() + .and_then(|n| match org[n] { + Element::Headline { level } => Some(Headline::from_node(n, level, org)), + Element::Section => None, + _ => unreachable!("Document should only contains section and headline."), + }) + } + + /// Changes the section content of this document. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// ** h1_1 + /// ** h1_2 + /// "#, + /// ); + /// + /// let mut d = org.document(); + /// + /// d.set_section_content("s", &mut org); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// s + /// ** h1_1 + /// ** h1_2 + /// "#, + /// ); + /// ``` + pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) + where + S: Into>, + { + if let Some(sec_n) = self.sec_n { + let children: Vec<_> = sec_n.children(&org.arena).collect(); + for child in children { + child.detach(&mut org.arena); + } + } else { + let sec_n = org.arena.new_node(Element::Section); + self.sec_n = Some(sec_n); + self.doc_n.prepend(sec_n, &mut org.arena); + } + + match content.into() { + Cow::Borrowed(content) => parse_container( + &mut org.arena, + Container::Block { + node: self.sec_n.unwrap(), + content, + }, + &ParseConfig::default(), + ), + Cow::Owned(ref content) => parse_container( + &mut OwnedArena::new(&mut org.arena), + Container::Block { + node: self.sec_n.unwrap(), + content, + }, + &ParseConfig::default(), + ), + } + + org.debug_validate(); + } + + /// Appends a new child to this document. + /// + /// Returns an error if the given new child was already attached, + /// or the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// ***** h1 + /// **** h2 + /// *** h3 + /// "#, + /// ); + /// + /// let d = org.document(); + /// + /// let mut h4 = Headline::new( + /// Title { + /// raw: "h4".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be smaller than or equal to 3 + /// h4.set_level(4, &mut org).unwrap(); + /// assert!(d.append(h4, &mut org).is_err()); + /// + /// h4.set_level(2, &mut org).unwrap(); + /// assert!(d.append(h4, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// ***** h1 + /// **** h2 + /// *** h3 + /// ** h4 + /// "#, + /// ); + /// + /// // cannot append an attached headline + /// assert!(d.append(h4, &mut org).is_err()); + /// ``` + pub fn append(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(last) = self.last_child(org) { + hdl.check_level(1..=last.lvl)?; + } else { + hdl.check_level(1..=usize::max_value())?; + } + + self.doc_n.append(hdl.hdl_n, &mut org.arena); + + org.debug_validate(); + + Ok(()) + } + + /// Prepends a new child to this document. + /// + /// Returns an error if the given new child was already attached, + /// or the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// ** h2 + /// ** h3 + /// "#, + /// ); + /// + /// let d = org.document(); + /// + /// let mut h1 = Headline::new( + /// Title { + /// raw: "h1".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be greater than 2 + /// h1.set_level(1, &mut org).unwrap(); + /// assert!(d.prepend(h1, &mut org).is_err()); + /// + /// h1.set_level(4, &mut org).unwrap(); + /// assert!(d.prepend(h1, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// **** h1 + /// ** h2 + /// ** h3 + /// "#, + /// ); + /// + /// // cannot prepend an attached headline + /// assert!(d.prepend(h1, &mut org).is_err()); + /// ``` + pub fn prepend(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(first) = self.first_child(org) { + hdl.check_level(first.lvl..=usize::MAX)?; + } else { + hdl.check_level(1..=usize::MAX)?; + } + + if let Some(sec_n) = self.sec_n { + sec_n.insert_after(hdl.hdl_n, &mut org.arena); + } else { + self.doc_n.prepend(hdl.hdl_n, &mut org.arena); + } + + org.debug_validate(); + + Ok(()) + } +} + +/// Represents a headline in `Org` struct. +/// +/// Each `Org` has zero or more `Headline`s. +#[derive(Copy, Clone, Debug)] +pub struct Headline { + lvl: usize, + hdl_n: NodeId, + ttl_n: NodeId, + sec_n: Option, +} + +impl Headline { + /// Creates a new detached Headline. + pub fn new<'a>(ttl: Title<'a>, org: &mut Org<'a>) -> Headline { + let lvl = ttl.level; + let hdl_n = org.arena.new_node(Element::Headline { level: ttl.level }); + let ttl_n = org.arena.new_node(Element::Document { pre_blank: 0 }); // placeholder + hdl_n.append(ttl_n, &mut org.arena); + + match ttl.raw { + Cow::Borrowed(content) => parse_container( + &mut org.arena, + Container::Inline { + node: ttl_n, + content, + }, + &ParseConfig::default(), + ), + Cow::Owned(ref content) => parse_container( + &mut OwnedArena::new(&mut org.arena), + Container::Inline { + node: ttl_n, + content, + }, + &ParseConfig::default(), + ), + } + + org[ttl_n] = Element::Title(ttl); + + Headline { + lvl, + hdl_n, + ttl_n, + sec_n: None, + } + } + + pub(crate) fn from_node(hdl_n: NodeId, lvl: usize, org: &Org) -> Headline { + let ttl_n = org.arena[hdl_n].first_child().unwrap(); + let sec_n = org.arena[ttl_n].next_sibling().and_then(|n| match org[n] { + Element::Section => Some(n), + _ => None, + }); + + Headline { + lvl, + hdl_n, + ttl_n, + sec_n, + } + } + + /// Returns the level of this headline. + pub fn level(self) -> usize { + self.lvl + } + + /// Returns the ID of the headline element of this headline. + pub fn headline_node(self) -> NodeId { + self.hdl_n + } + + /// Returns the ID of the title element of this headline. + pub fn title_node(self) -> NodeId { + self.ttl_n + } + + /// Returns the ID of the section element of this headline, or `None` if it has no section. + pub fn section_node(self) -> Option { + self.sec_n + } + + /// Returns a reference to the title element of this headline. + pub fn title<'a: 'b, 'b>(self, org: &'b Org<'a>) -> &'b Title<'a> { + match &org[self.ttl_n] { + Element::Title(title) => title, + _ => unreachable!(), + } + } + + /// Returns a mutual reference to the title element of this headline. + /// + /// Don't change the level and content of the `&mut Titile` directly. + /// Alternatively, uses [`Headline::set_level`] and [`Headline::set_title_content`]. + /// + /// [`Headline::set_level`]: #method.set_level + /// [`Headline::set_title_content`]: #method.set_title_content + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse("* h1"); + /// + /// let h1 = org.headlines().nth(0).unwrap(); + /// + /// h1.title_mut(&mut org).priority = Some('A'); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// "* [#A] h1\n", + /// ); + /// ``` + pub fn title_mut<'a: 'b, 'b>(self, org: &'b mut Org<'a>) -> &'b mut Title<'a> { + match &mut org[self.ttl_n] { + Element::Title(title) => title, + _ => unreachable!(), + } + } + + /// Changes the level of this headline. + /// + /// Returns an error if this headline is attached and the given new level + /// doesn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ****** h1_1 + /// *** h1_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let mut h1_2 = org.headlines().nth(2).unwrap(); + /// + /// // level must be greater than or equal to 2, and smaller than or equal to 6 + /// assert!(h1_2.set_level(42, &mut org).is_err()); + /// + /// assert!(h1_2.set_level(5, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// ****** h1_1 + /// ***** h1_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// // detached headline's levels can be changed freely + /// let mut new_headline = Headline::new( + /// Title { + /// raw: "new".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// new_headline.set_level(42, &mut org).unwrap(); + /// ``` + pub fn set_level(&mut self, lvl: usize, org: &mut Org) -> ValidationResult<()> { + if !self.is_detached(org) { + let min = self + .next(&org) + .or_else(|| self.parent(&org)) + .map(|hdl| hdl.lvl) + .unwrap_or(1); + let max = self + .previous(&org) + .map(|hdl| hdl.lvl) + .unwrap_or(usize::max_value()); + if !(min..=max).contains(&lvl) { + return Err(ValidationError::HeadlineLevelMismatch { + range: min..=max, + at: self.hdl_n, + }); + } + } + self.lvl = lvl; + self.title_mut(org).level = lvl; + if let Element::Headline { level } = &mut org[self.hdl_n] { + *level = lvl; + } + Ok(()) + } + + /// Changes the title content of this headline. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// "#, + /// ); + /// + /// let h1 = org.headlines().nth(0).unwrap(); + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// + /// h1.set_title_content("H1", &mut org); + /// h1_1.set_title_content(String::from("*H1_1*"), &mut org); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * H1 + /// ** *H1_1* + /// "#, + /// ); + /// ``` + pub fn set_title_content<'a, S>(self, content: S, org: &mut Org<'a>) + where + S: Into>, + { + let content = content.into(); + + let children: Vec<_> = self.ttl_n.children(&org.arena).collect(); + for child in children { + child.detach(&mut org.arena); + } + + match &content { + Cow::Borrowed(content) => parse_container( + &mut org.arena, + Container::Inline { + node: self.ttl_n, + content, + }, + &ParseConfig::default(), + ), + Cow::Owned(ref content) => parse_container( + &mut OwnedArena::new(&mut org.arena), + Container::Inline { + node: self.ttl_n, + content, + }, + &ParseConfig::default(), + ), + } + + self.title_mut(org).raw = content; + + org.debug_validate(); + } + + /// Changes the section content of this headline. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// s1_1 + /// "#, + /// ); + /// + /// let mut h1 = org.headlines().nth(0).unwrap(); + /// let mut h1_1 = org.headlines().nth(1).unwrap(); + /// + /// h1.set_section_content("s1", &mut org); + /// h1_1.set_section_content(String::from("*s1_1*"), &mut org); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// s1 + /// ** h1_1 + /// *s1_1* + /// "#, + /// ); + /// ``` + pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) + where + S: Into>, + { + if let Some(sec_n) = self.sec_n { + let children: Vec<_> = sec_n.children(&org.arena).collect(); + for child in children { + child.detach(&mut org.arena); + } + } else { + let sec_n = org.arena.new_node(Element::Section); + self.sec_n = Some(sec_n); + self.ttl_n.insert_after(sec_n, &mut org.arena); + } + + match content.into() { + Cow::Borrowed(content) => parse_container( + &mut org.arena, + Container::Block { + node: self.sec_n.unwrap(), + content, + }, + &ParseConfig::default(), + ), + Cow::Owned(ref content) => parse_container( + &mut OwnedArena::new(&mut org.arena), + Container::Block { + node: self.sec_n.unwrap(), + content, + }, + &ParseConfig::default(), + ), + } + + org.debug_validate(); + } + + /// Returns the parent of this headline, or `None` if it is detached or attached to the document. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1 = org.headlines().nth(0).unwrap(); + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// let h1_2_1 = org.headlines().nth(3).unwrap(); + /// + /// assert_eq!(h1_1.parent(&org).unwrap().title(&org).raw, "h1"); + /// assert_eq!(h1_2_1.parent(&org).unwrap().title(&org).raw, "h1_2"); + /// + /// assert!(h1.parent(&org).is_none()); + /// + /// // detached headline have no parent + /// assert!(Headline::new(Title::default(), &mut org).parent(&org).is_none()); + /// ``` + pub fn parent(self, org: &Org) -> Option { + org.arena[self.hdl_n].parent().and_then(|n| match org[n] { + Element::Headline { level } => Some(Headline::from_node(n, level, org)), + Element::Document { .. } => None, + _ => unreachable!(), + }) + } + + /// Returns an iterator of this headline's children. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1 = org.headlines().nth(0).unwrap(); + /// + /// let mut iter = h1.children(&org); + /// + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_1"); + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_2"); + /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_3"); + /// assert!(iter.next().is_none()); + /// ``` + pub fn children<'a>(self, org: &'a Org) -> impl Iterator + 'a { + self.hdl_n + .children(&org.arena) + // skip title and section + .skip(if self.sec_n.is_some() { 2 } else { 1 }) + .filter_map(move |n| match org[n] { + Element::Headline { level } => Some(Headline::from_node(n, level, org)), + _ => unreachable!(), + }) + } + + /// Returns the first child of this headline, or `None` if it has no child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// let h1_2 = org.headlines().nth(2).unwrap(); + /// let h1_3 = org.headlines().nth(5).unwrap(); + /// + /// assert_eq!(h1_2.first_child(&org).unwrap().title(&org).raw, "h1_2_1"); + /// + /// assert!(h1_1.first_child(&org).is_none()); + /// assert!(h1_3.first_child(&org).is_none()); + /// ``` + pub fn first_child(self, org: &Org) -> Option { + self.hdl_n + .children(&org.arena) + // skip title and section + .nth(if self.sec_n.is_some() { 2 } else { 1 }) + .map(|n| match org[n] { + Element::Headline { level } => Headline::from_node(n, level, org), + _ => unreachable!(), + }) + } + + /// Returns the last child of this headline, or `None` if it has no child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// let h1_2 = org.headlines().nth(2).unwrap(); + /// let h1_3 = org.headlines().nth(5).unwrap(); + /// + /// assert_eq!(h1_2.last_child(&org).unwrap().title(&org).raw, "h1_2_2"); + /// + /// assert!(h1_1.last_child(&org).is_none()); + /// assert!(h1_3.last_child(&org).is_none()); + /// ``` + pub fn last_child(self, org: &Org) -> Option { + org.arena[self.hdl_n] + .last_child() + .and_then(|n| match org[n] { + Element::Headline { level } => Some(Headline::from_node(n, level, org)), + Element::Section | Element::Title(_) => None, + _ => unreachable!("Headline should only contains section and headline."), + }) + } + + /// Returns the previous sibling of this headline, or `None` if it is a first child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// let h1_2 = org.headlines().nth(2).unwrap(); + /// let h1_2_1 = org.headlines().nth(3).unwrap(); + /// + /// assert_eq!(h1_2.previous(&org).unwrap().title(&org).raw, "h1_1"); + /// + /// assert!(h1_1.previous(&org).is_none()); + /// assert!(h1_2_1.previous(&org).is_none()); + /// ``` + pub fn previous(self, org: &Org) -> Option { + org.arena[self.hdl_n] + .previous_sibling() + .and_then(|n| match org[n] { + Element::Headline { level } => Some(Headline::from_node(n, level, org)), + Element::Title(_) | Element::Section => None, + _ => unreachable!(), + }) + } + + /// Returns the next sibling of this headline, or `None` if it is a last child. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1_2 = org.headlines().nth(2).unwrap(); + /// let h1_2_2 = org.headlines().nth(4).unwrap(); + /// let h1_3 = org.headlines().nth(5).unwrap(); + /// + /// assert_eq!(h1_2.next(&org).unwrap().title(&org).raw, "h1_3"); + /// + /// assert!(h1_3.next(&org).is_none()); + /// assert!(h1_2_2.next(&org).is_none()); + /// ``` + pub fn next(self, org: &Org) -> Option { + org.arena[self.hdl_n].next_sibling().map(|n| match org[n] { + Element::Headline { level } => Headline::from_node(n, level, org), + _ => unreachable!(), + }) + } + + /// Detaches this headline from arena. + /// + /// ```rust + /// # use orgize::Org; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_2 + /// *** h1_2_1 + /// *** h1_2_2 + /// ** h1_3 + /// "#, + /// ); + /// + /// let h1_2 = org.headlines().nth(2).unwrap(); + /// + /// h1_2.detach(&mut org); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// ** h1_1 + /// ** h1_3 + /// "#, + /// ); + /// ``` + pub fn detach(self, org: &mut Org) { + self.hdl_n.detach(&mut org.arena); + } + + /// Returns `true` if this headline is detached. + pub fn is_detached(self, org: &Org) -> bool { + org.arena[self.hdl_n].parent().is_none() + } + + /// Appends a new child to this headline. + /// + /// Returns an error if the given new child was already attached, or + /// the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ***** h1_1_1 + /// "#, + /// ); + /// + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// + /// let mut h1_1_2 = Headline::new( + /// Title { + /// raw: "h1_1_2".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be greater than 2, and smaller than or equal to 5 + /// h1_1_2.set_level(2, &mut org).unwrap(); + /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); + /// h1_1_2.set_level(6, &mut org).unwrap(); + /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); + /// + /// h1_1_2.set_level(4, &mut org).unwrap(); + /// assert!(h1_1.append(h1_1_2, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// ** h1_1 + /// ***** h1_1_1 + /// **** h1_1_2 + /// "#, + /// ); + /// + /// // cannot append an attached headline + /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); + /// ``` + pub fn append(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(last) = self.last_child(org) { + hdl.check_level(self.lvl + 1..=last.lvl)?; + } else { + hdl.check_level(self.lvl + 1..=usize::MAX)?; + } + + self.hdl_n.append(hdl.hdl_n, &mut org.arena); + + org.debug_validate(); + + Ok(()) + } + + /// Prepends a new child to this headline. + /// + /// Returns an error if the given new child was already attached, or + /// the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// ***** h1_1_1 + /// "#, + /// ); + /// + /// let h1_1 = org.headlines().nth(1).unwrap(); + /// + /// let mut h1_1_2 = Headline::new( + /// Title { + /// raw: "h1_1_2".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be greater than or equal to 5 + /// h1_1_2.set_level(2, &mut org).unwrap(); + /// assert!(h1_1.prepend(h1_1_2, &mut org).is_err()); + /// + /// h1_1_2.set_level(5, &mut org).unwrap(); + /// assert!(h1_1.prepend(h1_1_2, &mut org).is_ok()); + /// + /// // cannot prepend an attached headline + /// assert!(h1_1.prepend(h1_1_2, &mut org).is_err()); + /// ``` + pub fn prepend(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(first) = self.first_child(org) { + hdl.check_level(first.lvl..=usize::MAX)?; + } else { + hdl.check_level(self.lvl + 1..=usize::MAX)?; + } + + self.sec_n + .unwrap_or(self.ttl_n) + .insert_after(hdl.hdl_n, &mut org.arena); + + org.debug_validate(); + + Ok(()) + } + + /// Inserts a new sibling before this headline. + /// + /// Returns an error if the given new child was already attached, or + /// the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// **** h1_1_1 + /// *** h1_1_3 + /// "#, + /// ); + /// + /// let h1_1_3 = org.headlines().nth(3).unwrap(); + /// + /// let mut h1_1_2 = Headline::new( + /// Title { + /// raw: "h1_1_2".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be greater than or equal to 3, but smaller than or equal to 4 + /// h1_1_2.set_level(2, &mut org).unwrap(); + /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); + /// h1_1_2.set_level(5, &mut org).unwrap(); + /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); + /// + /// h1_1_2.set_level(4, &mut org).unwrap(); + /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// ** h1_1 + /// **** h1_1_1 + /// **** h1_1_2 + /// *** h1_1_3 + /// "#, + /// ); + /// + /// // cannot insert an attached headline + /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); + /// ``` + pub fn insert_before(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(previous) = self.previous(org) { + hdl.check_level(self.lvl..=previous.lvl)?; + } else { + hdl.check_level(self.lvl..=usize::MAX)?; + } + + self.hdl_n.insert_before(hdl.hdl_n, &mut org.arena); + + org.debug_validate(); + + Ok(()) + } + + /// Inserts a new sibling after this headline. + /// + /// Returns an error if the given new child was already attached, or + /// the given new child didn't meet the requirements. + /// + /// ```rust + /// # use orgize::{elements::Title, Headline, Org}; + /// # + /// let mut org = Org::parse( + /// r#" + /// * h1 + /// ** h1_1 + /// **** h1_1_1 + /// *** h1_1_3 + /// "#, + /// ); + /// + /// let h1_1_1 = org.headlines().nth(2).unwrap(); + /// + /// let mut h1_1_2 = Headline::new( + /// Title { + /// raw: "h1_1_2".into(), + /// ..Default::default() + /// }, + /// &mut org, + /// ); + /// + /// // level must be greater than or equal to 3, but smaller than or equal to 4 + /// h1_1_2.set_level(2, &mut org).unwrap(); + /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); + /// h1_1_2.set_level(5, &mut org).unwrap(); + /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); + /// + /// h1_1_2.set_level(4, &mut org).unwrap(); + /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_ok()); + /// + /// let mut writer = Vec::new(); + /// org.write_org(&mut writer).unwrap(); + /// assert_eq!( + /// String::from_utf8(writer).unwrap(), + /// r#" + /// * h1 + /// ** h1_1 + /// **** h1_1_1 + /// **** h1_1_2 + /// *** h1_1_3 + /// "#, + /// ); + /// + /// // cannot insert an attached headline + /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); + /// ``` + pub fn insert_after(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { + hdl.check_detached(org)?; + + if let Some(next) = self.next(org) { + hdl.check_level(next.lvl..=self.lvl)?; + } else if let Some(parent) = self.parent(org) { + hdl.check_level(parent.lvl + 1..=self.lvl)?; + } else { + hdl.check_level(1..=self.lvl)?; + } + + self.hdl_n.insert_after(hdl.hdl_n, &mut org.arena); + + org.debug_validate(); + + Ok(()) + } + + fn check_detached(self, org: &Org) -> ValidationResult<()> { + if !self.is_detached(org) { + Err(ValidationError::ExpectedDetached { at: self.hdl_n }) + } else { + Ok(()) + } + } + + fn check_level(self, range: RangeInclusive) -> ValidationResult<()> { + if !range.contains(&self.lvl) { + Err(ValidationError::HeadlineLevelMismatch { + range, + at: self.hdl_n, + }) + } else { + Ok(()) + } + } +} + +impl Org<'_> { + /// Returns the `Document`. + pub fn document(&self) -> Document { + Document::from_org(self) + } + + /// Returns an iterator of `Headline`s. + pub fn headlines(&self) -> impl Iterator + '_ { + self.root + .descendants(&self.arena) + .skip(1) + .filter_map(move |node| match self[node] { + Element::Headline { level } => Some(Headline::from_node(node, level, self)), + _ => None, + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index c1fd96a..f8e81f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,23 +1,242 @@ -#![doc = include_str!("../README.md")] +//! A Rust library for parsing orgmode files. +//! +//! [Live demo](https://orgize.herokuapp.com/) +//! +//! # Parse +//! +//! To parse a orgmode string, simply invoking the [`Org::parse`] function: +//! +//! [`Org::parse`]: struct.Org.html#method.parse +//! +//! ```rust +//! use orgize::Org; +//! +//! Org::parse("* DONE Title :tag:"); +//! ``` +//! +//! or [`Org::parse_custom`]: +//! +//! [`Org::parse_custom`]: struct.Org.html#method.parse_custom +//! +//! ```rust +//! use orgize::{Org, ParseConfig}; +//! +//! Org::parse_custom( +//! "* TASK Title 1", +//! &ParseConfig { +//! // custom todo keywords +//! todo_keywords: (vec!["TASK".to_string()], vec![]), +//! ..Default::default() +//! }, +//! ); +//! ``` +//! +//! # Iter +//! +//! [`Org::iter`] function will returns an iterator of [`Event`]s, which is +//! a simple wrapper of [`Element`]. +//! +//! [`Org::iter`]: struct.Org.html#method.iter +//! [`Event`]: enum.Event.html +//! [`Element`]: elements/enum.Element.html +//! +//! ```rust +//! use orgize::Org; +//! +//! for event in Org::parse("* DONE Title :tag:").iter() { +//! // handling the event +//! } +//! ``` +//! +//! **Note**: whether an element is container or not, it will appears twice in one loop. +//! One as [`Event::Start(element)`], one as [`Event::End(element)`]. +//! +//! [`Event::Start(element)`]: enum.Event.html#variant.Start +//! [`Event::End(element)`]: enum.Event.html#variant.End +//! +//! # Render html +//! +//! You can call the [`Org::write_html`] function to generate html directly, which +//! uses the [`DefaultHtmlHandler`] internally: +//! +//! [`Org::write_html`]: struct.Org.html#method.write_html +//! [`DefaultHtmlHandler`]: export/struct.DefaultHtmlHandler.html +//! +//! ```rust +//! use orgize::Org; +//! +//! let mut writer = Vec::new(); +//! Org::parse("* title\n*section*").write_html(&mut writer).unwrap(); +//! +//! assert_eq!( +//! String::from_utf8(writer).unwrap(), +//! "

    title

    section

    " +//! ); +//! ``` +//! +//! # Render html with custom `HtmlHandler` +//! +//! To customize html rendering, simply implementing [`HtmlHandler`] trait and passing +//! it to the [`Org::write_html_custom`] function. +//! +//! [`HtmlHandler`]: export/trait.HtmlHandler.html +//! [`Org::write_html_custom`]: struct.Org.html#method.write_html_custom +//! +//! The following code demonstrates how to add a id for every headline and return +//! own error type while rendering. +//! +//! ```rust +//! use std::convert::From; +//! use std::io::{Error as IOError, Write}; +//! use std::string::FromUtf8Error; +//! +//! use orgize::export::{DefaultHtmlHandler, HtmlHandler}; +//! use orgize::{Element, Org}; +//! use slugify::slugify; +//! +//! #[derive(Debug)] +//! enum MyError { +//! IO(IOError), +//! Heading, +//! Utf8(FromUtf8Error), +//! } +//! +//! // From trait is required for custom error type +//! impl From for MyError { +//! fn from(err: IOError) -> Self { +//! MyError::IO(err) +//! } +//! } +//! +//! impl From for MyError { +//! fn from(err: FromUtf8Error) -> Self { +//! MyError::Utf8(err) +//! } +//! } +//! +//! #[derive(Default)] +//! struct MyHtmlHandler(DefaultHtmlHandler); +//! +//! impl HtmlHandler for MyHtmlHandler { +//! fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { +//! if let Element::Title(title) = element { +//! if title.level > 6 { +//! return Err(MyError::Heading); +//! } else { +//! write!( +//! w, +//! "", +//! title.level, +//! slugify!(&title.raw), +//! )?; +//! } +//! } else { +//! // fallthrough to default handler +//! self.0.start(w, element)?; +//! } +//! Ok(()) +//! } +//! +//! fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { +//! if let Element::Title(title) = element { +//! write!(w, "", title.level)?; +//! } else { +//! self.0.end(w, element)?; +//! } +//! Ok(()) +//! } +//! } +//! +//! fn main() -> Result<(), MyError> { +//! let mut writer = Vec::new(); +//! let mut handler = MyHtmlHandler::default(); +//! Org::parse("* title\n*section*").write_html_custom(&mut writer, &mut handler)?; +//! +//! assert_eq!( +//! String::from_utf8(writer)?, +//! "

    title

    \ +//!

    section

    " +//! ); +//! +//! Ok(()) +//! } +//! ``` +//! +//! **Note**: as I mentioned above, each element will appears two times while iterating. +//! And handler will silently ignores all end events from non-container elements. +//! +//! So if you want to change how a non-container element renders, just redefine the `start` +//! function and leave the `end` function unchanged. +//! +//! # Serde +//! +//! `Org` struct have already implemented serde's `Serialize` trait. It means you can +//! serialize it into any format supported by serde, such as json: +//! +//! ```rust +//! use orgize::Org; +//! use serde_json::{json, to_string}; +//! +//! let org = Org::parse("I 'm *bold*."); +//! #[cfg(feature = "ser")] +//! println!("{}", to_string(&org).unwrap()); +//! +//! // { +//! // "type": "document", +//! // "children": [{ +//! // "type": "section", +//! // "children": [{ +//! // "type": "paragraph", +//! // "children":[{ +//! // "type": "text", +//! // "value":"I 'm " +//! // }, { +//! // "type": "bold", +//! // "children":[{ +//! // "type": "text", +//! // "value": "bold" +//! // }] +//! // }, { +//! // "type":"text", +//! // "value":"." +//! // }] +//! // }] +//! // }] +//! // } +//! ``` +//! +//! # Features +//! +//! By now, orgize provides three features: +//! +//! + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. +//! +//! + `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default. +//! +//! + `syntect`: provides [`SyntectHtmlHandler`] for highlighting code block, disabled by default. +//! +//! [`SyntectHtmlHandler`]: export/struct.SyntectHtmlHandler.html +//! +//! # License +//! +//! MIT -pub mod ast; -pub mod config; -mod entities; +mod config; +pub mod elements; pub mod export; +mod headline; mod org; -mod replace; -mod syntax; -#[cfg(test)] -mod tests; +mod parse; +mod parsers; +mod validate; -// Re-export of the rowan crate. -pub use rowan; +// Re-export of the indextree crate. +pub use indextree; +#[cfg(feature = "syntect")] +pub use syntect; pub use config::ParseConfig; -pub use org::Org; -pub use rowan::{TextRange, TextSize}; -pub use syntax::{ - SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxNodeChildren, SyntaxToken, -}; - -pub(crate) use syntax::combinator::lossless_parser; +pub use elements::Element; +pub use headline::{Document, Headline}; +pub use org::{Event, Org}; +pub use validate::ValidationError; diff --git a/src/org.rs b/src/org.rs index f0e736b..37c06fc 100644 --- a/src/org.rs +++ b/src/org.rs @@ -1,106 +1,193 @@ -use rowan::ast::AstNode; -use rowan::{GreenNode, TextSize}; +use indextree::{Arena, NodeEdge, NodeId}; +use std::io::{Error, Write}; +use std::ops::{Index, IndexMut}; -use crate::ast::Document; -use crate::config::ParseConfig; -use crate::export::{HtmlExport, TraversalContext, Traverser}; -use crate::syntax::{OrgLanguage, SyntaxNode}; -use crate::SyntaxElement; +use crate::{ + config::{ParseConfig, DEFAULT_CONFIG}, + elements::{Element, Keyword}, + export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler}, + parsers::{blank_lines_count, parse_container, Container, OwnedArena}, +}; + +pub struct Org<'a> { + pub(crate) arena: Arena>, + pub(crate) root: NodeId, +} #[derive(Debug)] -pub struct Org { - pub(crate) green: GreenNode, - pub(crate) config: ParseConfig, +pub enum Event<'a, 'b> { + Start(&'b Element<'a>), + End(&'b Element<'a>), } -impl Org { - /// Parse input string to Org element tree using default parse config - pub fn parse(input: impl AsRef) -> Org { - ParseConfig::default().parse(input) +impl<'a> Org<'a> { + /// Creates a new empty `Org` struct. + pub fn new() -> Org<'static> { + let mut arena = Arena::new(); + let root = arena.new_node(Element::Document { pre_blank: 0 }); + Org { arena, root } } - pub fn green(&self) -> &GreenNode { - &self.green + /// Parses string `text` into `Org` struct. + pub fn parse(text: &'a str) -> Org<'a> { + Org::parse_custom(text, &DEFAULT_CONFIG) } - pub fn config(&self) -> &ParseConfig { - &self.config + /// Likes `parse`, but accepts `String`. + pub fn parse_string(text: String) -> Org<'static> { + Org::parse_string_custom(text, &DEFAULT_CONFIG) } - /// Returns the document - pub fn document(&self) -> Document { - Document { - syntax: SyntaxNode::new_root(self.green.clone()), - } - } + /// Parses string `text` into `Org` struct with custom `ParseConfig`. + pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> { + let mut arena = Arena::new(); + let (text, pre_blank) = blank_lines_count(text); + let root = arena.new_node(Element::Document { pre_blank }); + let mut org = Org { arena, root }; - /// Returns org-mode string - pub fn to_org(&self) -> String { - self.green.to_string() - } - - /// Convert org element tree to html-format using default html handler - pub fn to_html(&self) -> String { - let mut handler = HtmlExport::default(); - self.traverse(&mut handler); - handler.finish() - } - - /// Walk through org element tree using given traverser - pub fn traverse(&self, t: &mut T) { - let mut ctx = TraversalContext::default(); - t.element( - SyntaxElement::Node(SyntaxNode::new_root(self.green.clone())), - &mut ctx, + parse_container( + &mut org.arena, + Container::Document { + content: text, + node: org.root, + }, + config, ); + + org.debug_validate(); + + org } - /// Returns the first node in org element tree in depth first order - pub fn first_node>(&self) -> Option { - fn find>(node: SyntaxNode) -> Option { - if N::can_cast(node.kind()) { - N::cast(node) - } else { - node.children().find_map(find) - } - } - find(SyntaxNode::new_root(self.green.clone())) + /// Likes `parse_custom`, but accepts `String`. + pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> { + let mut arena = Arena::new(); + let (text, pre_blank) = blank_lines_count(&text); + let root = arena.new_node(Element::Document { pre_blank }); + let mut org = Org { arena, root }; + + parse_container( + &mut OwnedArena::new(&mut org.arena), + Container::Document { + content: text, + node: org.root, + }, + config, + ); + + org.debug_validate(); + + org } - /// Returns node in given offset - /// - /// ```rust - /// use orgize::{Org, ast::Headline}; - /// - /// let org = Org::parse("\n\n* foo\n* bar"); - /// - /// assert!(org.node_at_offset::(0).is_none()); - /// - /// let hdl = org.node_at_offset::(2).unwrap(); - /// assert_eq!(hdl.title_raw(), "foo"); - /// - /// let hdl = org.node_at_offset::(9).unwrap(); - /// assert_eq!(hdl.title_raw(), "bar"); - /// - /// assert!(org.node_at_offset::(999).is_none()); - /// ``` - pub fn node_at_offset>( - &self, - offset: impl Into, - ) -> Option { - let offset = offset.into(); - fn find>( - node: SyntaxNode, - offset: TextSize, - ) -> Option { - if !node.text_range().contains(offset) { - None - } else if N::can_cast(node.kind()) { - N::cast(node) - } else { - node.children().find_map(|node| find(node, offset)) + /// Returns a reference to the underlay arena. + pub fn arena(&self) -> &Arena> { + &self.arena + } + + /// Returns a mutual reference to the underlay arena. + pub fn arena_mut(&mut self) -> &mut Arena> { + &mut self.arena + } + + /// Returns an iterator of `Event`s. + pub fn iter<'b>(&'b self) -> impl Iterator> + 'b { + self.root.traverse(&self.arena).map(move |edge| match edge { + NodeEdge::Start(node) => Event::Start(&self[node]), + NodeEdge::End(node) => Event::End(&self[node]), + }) + } + + /// Returns an iterator of `Keyword`s. + pub fn keywords(&self) -> impl Iterator> { + self.root + .descendants(&self.arena) + .skip(1) + .filter_map(move |node| match &self[node] { + Element::Keyword(kw) => Some(kw), + _ => None, + }) + } + + /// Writes an `Org` struct as html format. + pub fn write_html(&self, writer: W) -> Result<(), Error> + where + W: Write, + { + self.write_html_custom(writer, &mut DefaultHtmlHandler) + } + + /// Writes an `Org` struct as html format with custom `HtmlHandler`. + pub fn write_html_custom(&self, mut writer: W, handler: &mut H) -> Result<(), E> + where + W: Write, + E: From, + H: HtmlHandler, + { + for event in self.iter() { + match event { + Event::Start(element) => handler.start(&mut writer, element)?, + Event::End(element) => handler.end(&mut writer, element)?, } } - find(SyntaxNode::new_root(self.green.clone()), offset) + + Ok(()) + } + + /// Writes an `Org` struct as org format. + pub fn write_org(&self, writer: W) -> Result<(), Error> + where + W: Write, + { + self.write_org_custom(writer, &mut DefaultOrgHandler) + } + + /// Writes an `Org` struct as org format with custom `OrgHandler`. + pub fn write_org_custom(&self, mut writer: W, handler: &mut H) -> Result<(), E> + where + W: Write, + E: From, + H: OrgHandler, + { + for event in self.iter() { + match event { + Event::Start(element) => handler.start(&mut writer, element)?, + Event::End(element) => handler.end(&mut writer, element)?, + } + } + + Ok(()) + } +} + +impl Default for Org<'static> { + fn default() -> Self { + Org::new() + } +} + +impl<'a> Index for Org<'a> { + type Output = Element<'a>; + + fn index(&self, node_id: NodeId) -> &Self::Output { + self.arena[node_id].get() + } +} + +impl<'a> IndexMut for Org<'a> { + fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output { + self.arena[node_id].get_mut() + } +} + +#[cfg(feature = "ser")] +use serde::{ser::Serializer, Serialize}; + +#[cfg(feature = "ser")] +impl Serialize for Org<'_> { + fn serialize(&self, serializer: S) -> Result { + use serde_indextree::Node; + + serializer.serialize_newtype_struct("Org", &Node::new(self.root, &self.arena)) } } diff --git a/src/parse/combinators.rs b/src/parse/combinators.rs new file mode 100644 index 0000000..f5d518e --- /dev/null +++ b/src/parse/combinators.rs @@ -0,0 +1,136 @@ +//! Parsers combinators + +use memchr::memchr; +use nom::{ + bytes::complete::take_while1, + combinator::verify, + error::{make_error, ErrorKind}, + Err, IResult, +}; + +// read until the first line_ending, if line_ending is not present, return the input directly +pub fn line(input: &str) -> IResult<&str, &str, ()> { + if let Some(i) = memchr(b'\n', input.as_bytes()) { + if i > 0 && input.as_bytes()[i - 1] == b'\r' { + Ok((&input[i + 1..], &input[0..i - 1])) + } else { + Ok((&input[i + 1..], &input[0..i])) + } + } else { + Ok(("", input)) + } +} + +pub fn lines_till(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()> +where + F: Fn(&str) -> bool, +{ + move |i| { + let mut input = i; + + loop { + // TODO: better error kind + if input.is_empty() { + return Err(Err::Error(make_error(input, ErrorKind::Many0))); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if predicate(line_) { + let offset = i.len() - input.len(); + return Ok((input_, &i[0..offset])); + } + + input = input_; + } + } +} + +pub fn lines_while(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()> +where + F: Fn(&str) -> bool, +{ + move |i| { + let mut input = i; + + loop { + // unlike lines_till, line_while won't return error + if input.is_empty() { + return Ok(("", i)); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if !predicate(line_) { + let offset = i.len() - input.len(); + return Ok((input, &i[0..offset])); + } + + input = input_; + } + } +} + +#[test] +fn test_lines_while() { + assert_eq!(lines_while(|line| line == "foo")("foo"), Ok(("", "foo"))); + assert_eq!(lines_while(|line| line == "foo")("bar"), Ok(("bar", ""))); + assert_eq!( + lines_while(|line| line == "foo")("foo\n\n"), + Ok(("\n", "foo\n")) + ); + assert_eq!( + lines_while(|line| line.trim().is_empty())("\n\n\n"), + Ok(("", "\n\n\n")) + ); +} + +pub fn eol(input: &str) -> IResult<&str, &str, ()> { + verify(line, |s: &str| { + s.as_bytes().iter().all(u8::is_ascii_whitespace) + })(input) +} + +pub fn one_word(input: &str) -> IResult<&str, &str, ()> { + take_while1(|c: char| !c.is_ascii_whitespace())(input) +} + +pub fn blank_lines_count(input: &str) -> IResult<&str, usize, ()> { + let mut count = 0; + let mut input = input; + + loop { + if input.is_empty() { + return Ok(("", count)); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if !line_.chars().all(char::is_whitespace) { + return Ok((input, count)); + } + + count += 1; + + input = input_; + } +} + +#[test] +fn test_blank_lines_count() { + assert_eq!(blank_lines_count("foo"), Ok(("foo", 0))); + assert_eq!(blank_lines_count(" foo"), Ok((" foo", 0))); + assert_eq!(blank_lines_count(" \t\nfoo\n"), Ok(("foo\n", 1))); + assert_eq!(blank_lines_count("\n \r\n\nfoo\n"), Ok(("foo\n", 3))); + assert_eq!( + blank_lines_count("\r\n \n \r\n foo\n"), + Ok((" foo\n", 3)) + ); + assert_eq!(blank_lines_count("\r\n \n \r\n \n"), Ok(("", 4))); +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..0c49327 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1 @@ +pub mod combinators; diff --git a/src/parsers.rs b/src/parsers.rs new file mode 100644 index 0000000..d277057 --- /dev/null +++ b/src/parsers.rs @@ -0,0 +1,657 @@ +use std::iter::once; +use std::marker::PhantomData; + +use indextree::{Arena, NodeId}; +use jetscii::{bytes, BytesConst}; +use memchr::{memchr, memchr_iter}; +use nom::bytes::complete::take_while1; + +use crate::config::ParseConfig; +use crate::elements::{ + block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target, + Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall, + InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target, + Timestamp, Title, +}; +use crate::parse::combinators::lines_while; + +pub trait ElementArena<'a> { + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>; + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>; + fn set(&mut self, node: NodeId, element: T) + where + T: Into>; +} + +pub type BorrowedArena<'a> = Arena>; + +impl<'a> ElementArena<'a> for BorrowedArena<'a> { + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { + let node = self.new_node(element.into()); + parent.append(node, self); + node + } + + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { + if let Some(child) = self[parent].last_child() { + let node = self.new_node(element.into()); + child.insert_before(node, self); + node + } else { + self.append(element, parent) + } + } + + fn set(&mut self, node: NodeId, element: T) + where + T: Into>, + { + *self[node].get_mut() = element.into(); + } +} + +pub struct OwnedArena<'a, 'b, 'c> { + arena: &'b mut Arena>, + phantom: PhantomData<&'a ()>, +} + +impl<'a, 'b, 'c> OwnedArena<'a, 'b, 'c> { + pub fn new(arena: &'b mut Arena>) -> OwnedArena<'a, 'b, 'c> { + OwnedArena { + arena, + phantom: PhantomData, + } + } +} + +impl<'a> ElementArena<'a> for OwnedArena<'a, '_, '_> { + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { + self.arena.append(element.into().into_owned(), parent) + } + + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { + self.arena + .insert_before_last_child(element.into().into_owned(), parent) + } + + fn set(&mut self, node: NodeId, element: T) + where + T: Into>, + { + self.arena.set(node, element.into().into_owned()); + } +} + +#[derive(Debug)] +pub enum Container<'a> { + // Block, List Item + Block { content: &'a str, node: NodeId }, + // Paragraph, Inline Markup + Inline { content: &'a str, node: NodeId }, + // Headline + Headline { content: &'a str, node: NodeId }, + // Document + Document { content: &'a str, node: NodeId }, +} + +pub fn parse_container<'a, T: ElementArena<'a>>( + arena: &mut T, + container: Container<'a>, + config: &ParseConfig, +) { + let containers = &mut vec![container]; + + while let Some(container) = containers.pop() { + match container { + Container::Document { content, node } => { + parse_section_and_headlines(arena, content, node, containers); + } + Container::Headline { content, node } => { + parse_headline_content(arena, content, node, containers, config); + } + Container::Block { content, node } => { + parse_blocks(arena, content, node, containers); + } + Container::Inline { content, node } => { + parse_inlines(arena, content, node, containers); + } + } + } +} + +pub fn parse_headline_content<'a, T: ElementArena<'a>>( + arena: &mut T, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, + config: &ParseConfig, +) { + let (tail, (title, content)) = Title::parse(content, config).unwrap(); + let node = arena.append(title, parent); + containers.push(Container::Inline { content, node }); + parse_section_and_headlines(arena, tail, parent, containers); +} + +pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( + arena: &mut T, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let content = blank_lines_count(content).0; + + if content.is_empty() { + return; + } + + let mut last_end = 0; + for i in memchr_iter(b'\n', content.as_bytes()).chain(once(content.len())) { + if let Some((mut tail, (headline_content, level))) = parse_headline(&content[last_end..]) { + if last_end != 0 { + let node = arena.append(Element::Section, parent); + let content = &content[0..last_end]; + containers.push(Container::Block { content, node }); + } + + let node = arena.append(Element::Headline { level }, parent); + containers.push(Container::Headline { + content: headline_content, + node, + }); + + while let Some((new_tail, (content, level))) = parse_headline(tail) { + debug_assert_ne!(tail, new_tail); + let node = arena.append(Element::Headline { level }, parent); + containers.push(Container::Headline { content, node }); + tail = new_tail; + } + return; + } + last_end = i + 1; + } + + let node = arena.append(Element::Section, parent); + containers.push(Container::Block { content, node }); +} + +pub fn parse_blocks<'a, T: ElementArena<'a>>( + arena: &mut T, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = blank_lines_count(content).0; + + if let Some(new_tail) = parse_block(content, arena, parent, containers) { + tail = blank_lines_count(new_tail).0; + } + + let mut text = tail; + let mut pos = 0; + + while !tail.is_empty() { + let i = memchr(b'\n', tail.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| tail.len()); + if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { + let (tail_, blank) = blank_lines_count(&tail[i..]); + debug_assert_ne!(tail, tail_); + tail = tail_; + + let node = arena.append( + Element::Paragraph { + // including the current line (&tail[0..i]) + post_blank: blank + 1, + }, + parent, + ); + + containers.push(Container::Inline { + content: &text[0..pos].trim_end(), + node, + }); + + pos = 0; + text = tail; + } else if let Some(new_tail) = parse_block(tail, arena, parent, containers) { + if pos != 0 { + let node = + arena.insert_before_last_child(Element::Paragraph { post_blank: 0 }, parent); + + containers.push(Container::Inline { + content: &text[0..pos].trim_end(), + node, + }); + + pos = 0; + } + debug_assert_ne!(tail, blank_lines_count(new_tail).0); + tail = blank_lines_count(new_tail).0; + text = tail; + } else { + debug_assert_ne!(tail, &tail[i..]); + tail = &tail[i..]; + pos += i; + } + } + + if !text.is_empty() { + let node = arena.append(Element::Paragraph { post_blank: 0 }, parent); + + containers.push(Container::Inline { + content: &text[0..pos].trim_end(), + node, + }); + } +} + +pub fn parse_block<'a, T: ElementArena<'a>>( + contents: &'a str, + arena: &mut T, + parent: NodeId, + containers: &mut Vec>, +) -> Option<&'a str> { + match contents + .as_bytes() + .iter() + .find(|c| !c.is_ascii_whitespace())? + { + b'[' => { + let (tail, (fn_def, content)) = FnDef::parse(contents)?; + let node = arena.append(fn_def, parent); + containers.push(Container::Block { content, node }); + Some(tail) + } + b'0'..=b'9' | b'*' => { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } + b'C' => { + let (tail, clock) = Clock::parse(contents)?; + arena.append(clock, parent); + Some(tail) + } + b'\'' => { + // TODO: LaTeX environment + None + } + b'-' => { + if let Some((tail, rule)) = Rule::parse(contents) { + arena.append(rule, parent); + Some(tail) + } else { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } + } + b':' => { + if let Some((tail, (drawer, content))) = Drawer::parse(contents) { + let node = arena.append(drawer, parent); + containers.push(Container::Block { content, node }); + Some(tail) + } else { + let (tail, fixed_width) = FixedWidth::parse(contents)?; + arena.append(fixed_width, parent); + Some(tail) + } + } + b'|' => { + let tail = parse_org_table(arena, contents, containers, parent); + Some(tail) + } + b'+' => { + if let Some((tail, table)) = Table::parse_table_el(contents) { + arena.append(table, parent); + Some(tail) + } else { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } + } + b'#' => { + if let Some((tail, block)) = RawBlock::parse(contents) { + let (element, content) = block.into_element(); + // avoid use after free + let is_block_container = match element { + Element::CenterBlock(_) + | Element::QuoteBlock(_) + | Element::VerseBlock(_) + | Element::SpecialBlock(_) => true, + _ => false, + }; + let node = arena.append(element, parent); + if is_block_container { + containers.push(Container::Block { content, node }); + } + Some(tail) + } else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) { + let node = arena.append(dyn_block, parent); + containers.push(Container::Block { content, node }); + Some(tail) + } else if let Some((tail, keyword)) = RawKeyword::parse(contents) { + arena.append(keyword.into_element(), parent); + Some(tail) + } else { + let (tail, comment) = Comment::parse(contents)?; + arena.append(comment, parent); + Some(tail) + } + } + _ => None, + } +} + +struct InlinePositions<'a> { + bytes: &'a [u8], + pos: usize, + next: Option, +} + +impl InlinePositions<'_> { + fn new(bytes: &[u8]) -> InlinePositions { + InlinePositions { + bytes, + pos: 0, + next: Some(0), + } + } +} + +impl Iterator for InlinePositions<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + lazy_static::lazy_static! { + static ref PRE_BYTES: BytesConst = + bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); + } + + self.next.take().or_else(|| { + PRE_BYTES.find(&self.bytes[self.pos..]).map(|i| { + self.pos += i + 1; + + match self.bytes[self.pos - 1] { + b'{' => { + self.next = Some(self.pos); + self.pos - 1 + } + b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos, + _ => self.pos - 1, + } + }) + }) + } +} + +pub fn parse_inlines<'a, T: ElementArena<'a>>( + arena: &mut T, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = content; + + if let Some(tail_) = parse_inline(tail, arena, containers, parent) { + tail = tail_; + } + + while let Some((tail_, i)) = InlinePositions::new(tail.as_bytes()) + .filter_map(|i| parse_inline(&tail[i..], arena, containers, parent).map(|tail| (tail, i))) + .next() + { + if i != 0 { + arena.insert_before_last_child( + Element::Text { + value: tail[0..i].into(), + }, + parent, + ); + } + tail = tail_; + } + + if !tail.is_empty() { + arena.append(Element::Text { value: tail.into() }, parent); + } +} + +pub fn parse_inline<'a, T: ElementArena<'a>>( + contents: &'a str, + arena: &mut T, + containers: &mut Vec>, + parent: NodeId, +) -> Option<&'a str> { + if contents.len() < 3 { + return None; + } + + let byte = contents.as_bytes()[0]; + + match byte { + b'@' => { + let (tail, snippet) = Snippet::parse(contents)?; + arena.append(snippet, parent); + Some(tail) + } + b'{' => { + let (tail, macros) = Macros::parse(contents)?; + arena.append(macros, parent); + Some(tail) + } + b'<' => { + if let Some((tail, _content)) = parse_radio_target(contents) { + arena.append(Element::RadioTarget, parent); + Some(tail) + } else if let Some((tail, target)) = Target::parse(contents) { + arena.append(target, parent); + Some(tail) + } else if let Some((tail, timestamp)) = Timestamp::parse_active(contents) { + arena.append(timestamp, parent); + Some(tail) + } else { + let (tail, timestamp) = Timestamp::parse_diary(contents)?; + arena.append(timestamp, parent); + Some(tail) + } + } + b'[' => { + if let Some((tail, fn_ref)) = FnRef::parse(contents) { + arena.append(fn_ref, parent); + Some(tail) + } else if let Some((tail, link)) = Link::parse(contents) { + arena.append(link, parent); + Some(tail) + } else if let Some((tail, cookie)) = Cookie::parse(contents) { + arena.append(cookie, parent); + Some(tail) + } else { + let (tail, timestamp) = Timestamp::parse_inactive(contents)?; + arena.append(timestamp, parent); + Some(tail) + } + } + b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => { + let (tail, emphasis) = Emphasis::parse(contents, byte)?; + let (element, content) = emphasis.into_element(); + let is_inline_container = match element { + Element::Bold | Element::Strike | Element::Italic | Element::Underline => true, + _ => false, + }; + let node = arena.append(element, parent); + if is_inline_container { + containers.push(Container::Inline { content, node }); + } + Some(tail) + } + b's' => { + let (tail, inline_src) = InlineSrc::parse(contents)?; + arena.append(inline_src, parent); + Some(tail) + } + b'c' => { + let (tail, inline_call) = InlineCall::parse(contents)?; + arena.append(inline_call, parent); + Some(tail) + } + _ => None, + } +} + +pub fn parse_list<'a, T: ElementArena<'a>>( + arena: &mut T, + contents: &'a str, + parent: NodeId, + containers: &mut Vec>, +) -> Option<&'a str> { + let (mut tail, (first_item, content)) = ListItem::parse(contents)?; + let first_item_indent = first_item.indent; + let first_item_ordered = first_item.ordered; + + let parent = arena.append(Element::Document { pre_blank: 0 }, parent); // placeholder + + let node = arena.append(first_item, parent); + containers.push(Container::Block { content, node }); + + while let Some((tail_, (item, content))) = ListItem::parse(tail) { + if item.indent == first_item_indent { + let node = arena.append(item, parent); + containers.push(Container::Block { content, node }); + debug_assert_ne!(tail, tail_); + tail = tail_; + } else { + break; + } + } + + let (tail, post_blank) = blank_lines_count(tail); + + arena.set( + parent, + List { + indent: first_item_indent, + ordered: first_item_ordered, + post_blank, + }, + ); + + Some(tail) +} + +pub fn parse_org_table<'a, T: ElementArena<'a>>( + arena: &mut T, + contents: &'a str, + containers: &mut Vec>, + parent: NodeId, +) -> &'a str { + let (tail, contents) = + lines_while(|line| line.trim_start().starts_with('|'))(contents).unwrap_or((contents, "")); + let (tail, post_blank) = blank_lines_count(tail); + + let mut iter = contents.trim_end().lines().peekable(); + + let mut lines = vec![]; + + let mut has_header = false; + + // TODO: merge contiguous rules + + if let Some(line) = iter.next() { + let line = line.trim_start(); + if !line.starts_with("|-") { + lines.push(line); + } + } + + while let Some(line) = iter.next() { + let line = line.trim_start(); + if iter.peek().is_none() && line.starts_with("|-") { + break; + } else if line.starts_with("|-") { + has_header = true; + } + lines.push(line); + } + + let parent = arena.append( + Table::Org { + tblfm: None, + post_blank, + has_header, + }, + parent, + ); + + for line in lines { + if line.starts_with("|-") { + if has_header { + arena.append(Element::TableRow(TableRow::HeaderRule), parent); + has_header = false; + } else { + arena.append(Element::TableRow(TableRow::BodyRule), parent); + } + } else { + if has_header { + let parent = arena.append(Element::TableRow(TableRow::Header), parent); + for content in line.split_terminator('|').skip(1) { + let node = arena.append(Element::TableCell(TableCell::Header), parent); + containers.push(Container::Inline { + content: content.trim(), + node, + }); + } + } else { + let parent = arena.append(Element::TableRow(TableRow::Body), parent); + for content in line.split_terminator('|').skip(1) { + let node = arena.append(Element::TableCell(TableCell::Body), parent); + containers.push(Container::Inline { + content: content.trim(), + node, + }); + } + } + } + } + + tail +} + +pub fn blank_lines_count(input: &str) -> (&str, usize) { + crate::parse::combinators::blank_lines_count(input).unwrap_or((input, 0)) +} + +pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> { + let (input_, level) = parse_headline_level(input)?; + let (input_, content) = lines_while(move |line| { + parse_headline_level(line) + .map(|(_, l)| l > level) + .unwrap_or(true) + })(input_) + .unwrap_or((input_, "")); + Some((input_, (&input[0..level + content.len()], level))) +} + +pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> { + let (input, stars) = take_while1::<_, _, ()>(|c: char| c == '*')(input).ok()?; + + if input.starts_with(' ') || input.starts_with('\n') || input.is_empty() { + Some((input, stars.len())) + } else { + None + } +} diff --git a/src/replace.rs b/src/replace.rs deleted file mode 100644 index aa63c95..0000000 --- a/src/replace.rs +++ /dev/null @@ -1,326 +0,0 @@ -use rowan::{ - ast::{support, AstNode}, - SyntaxNode, TextRange, TextSize, TokenAtOffset, -}; - -use crate::ast::Headline; -use crate::syntax::{ - combinator::line_starts_iter, document::document_node, headline::headline_node, OrgLanguage, -}; -use crate::Org; - -#[derive(Debug)] -enum RangeShape { - InsideHeadline { headline: Headline, level: usize }, - ExactHeadline { headline: Headline, level: usize }, - Other, -} - -impl RangeShape { - pub fn new(mut node: SyntaxNode, range: TextRange) -> Self { - let mut result = RangeShape::Other; - - 'l: loop { - for headline in support::children::(&node) { - let level = headline.level(); - let start = headline.syntax.text_range().start(); - let end = headline.syntax.text_range().end(); - - if headline.syntax.text_range() == range { - result = RangeShape::ExactHeadline { headline, level }; - break 'l; - } - - if TextRange::new(start + TextSize::from(level as u32 + 1), end) - .contains_range(range) - { - node = headline.syntax.clone(); - result = RangeShape::InsideHeadline { headline, level }; - continue 'l; - } - } - break; - } - - result - } -} - -#[derive(Debug, PartialEq)] -enum ReplaceWithShape { - IncludeHeadline { level: usize }, - ExactHeadline { level: usize }, - Other, -} - -impl ReplaceWithShape { - fn new(text: &str) -> Self { - let mut result = ReplaceWithShape::Other; - - for start in line_starts_iter(text) { - let level = text[start..].bytes().take_while(|&c| c == b'*').count(); - - if level == 0 { - continue; - } - - if !matches!(text[start..].as_bytes().get(level), Some(b' ')) { - continue; - } - - match result { - ReplaceWithShape::IncludeHeadline { level: l } => { - if level < l { - result = ReplaceWithShape::IncludeHeadline { level } - } - } - ReplaceWithShape::ExactHeadline { level: l } => { - if level <= l { - result = ReplaceWithShape::IncludeHeadline { level } - } - } - ReplaceWithShape::Other => { - if start == 0 { - result = ReplaceWithShape::ExactHeadline { level } - } else { - result = ReplaceWithShape::IncludeHeadline { level } - } - } - } - } - - result - } -} - -impl Org { - /// Replace specified range with given text, and reparse the syntax tree with current config - /// - /// This method optimizes parsing by analyzing the selected range and given text, and reducing - /// the amount of data processed by parser. - /// - /// ```rust - /// use orgize::{Org, ast::Headline, TextRange, TextSize}; - /// - /// let mut org = Org::parse("** hello"); - /// let hdl = org.first_node::().unwrap(); - /// assert_eq!(hdl.level(), 2); - /// - /// // replace '**' with '*****' - /// org.replace_range(TextRange::new(0.into(), 2.into()), "*****"); - /// // since the syntax tree is changed, we have to query again - /// let hdl = org.first_node::().unwrap(); - /// assert_eq!(hdl.level(), 5); - /// ``` - pub fn replace_range(&mut self, range: TextRange, replace_with: impl AsRef) { - let replace_with = replace_with.as_ref(); - match ( - RangeShape::new(self.document().syntax, range), - ReplaceWithShape::new(replace_with), - ) { - ( - RangeShape::ExactHeadline { headline, level }, - ReplaceWithShape::IncludeHeadline { level: new_level }, - ) - | ( - RangeShape::InsideHeadline { headline, level }, - ReplaceWithShape::IncludeHeadline { level: new_level }, - ) if level < new_level => self.replace_headline(headline, range, replace_with), - - ( - RangeShape::ExactHeadline { headline, level }, - ReplaceWithShape::ExactHeadline { level: new_level }, - ) if level <= new_level - // non-last headline must ends with a newline - && (headline.end() == self.document().end() - || replace_with.ends_with(&['\n', '\r'])) => - { - self.replace_headline(headline, range, replace_with) - } - - ( - RangeShape::InsideHeadline { headline, level }, - ReplaceWithShape::ExactHeadline { level: new_level }, - ) if level <= new_level && follows_newline(headline.syntax(), range.start()) => { - self.replace_headline(headline, range, replace_with) - } - - _ => self.full_parse(range, replace_with), - } - } - - fn full_parse(&mut self, range: TextRange, replace_with: &str) { - if self.document().syntax().text_range() == range { - let input = (replace_with, &self.config).into(); - self.green = document_node(input).unwrap().1.into_node().unwrap(); - } else { - let start: usize = range.start().into(); - let end: usize = range.end().into(); - let mut text = self.green.to_string(); - text.replace_range(start..end, replace_with); - let input = (text.as_ref(), &self.config).into(); - self.green = document_node(input).unwrap().1.into_node().unwrap(); - } - } - - fn replace_headline(&mut self, headline: Headline, range: TextRange, replace_with: &str) { - if headline.syntax().text_range() == range { - let input = (replace_with, &self.config).into(); - - self.green = headline - .syntax - .replace_with(headline_node(input).unwrap().1.into_node().unwrap()); - } else { - let offset: usize = headline.syntax.text_range().start().into(); - let start: usize = range.start().into(); - let end: usize = range.end().into(); - - let mut text = headline.syntax.to_string(); - text.replace_range((start - offset)..(end - offset), replace_with); - - let input = (text.as_ref(), &self.config).into(); - - self.green = headline - .syntax - .replace_with(headline_node(input).unwrap().1.into_node().unwrap()); - } - } -} - -fn follows_newline(syntax: &SyntaxNode, offset: TextSize) -> bool { - match syntax.token_at_offset(offset) { - TokenAtOffset::None => false, - TokenAtOffset::Single(t) => { - let offset: usize = (offset - t.text_range().start()).into(); - t.text()[offset..].ends_with('\n') || t.text()[offset..].ends_with('\r') - } - TokenAtOffset::Between(t, _) => t.text().ends_with('\n') || t.text().ends_with('\r'), - } -} - -#[test] -fn replace() { - assert!(follows_newline( - Org::parse("\n*a*").document().syntax(), - TextSize::new(1) - )); - assert!(follows_newline( - Org::parse(" \na").document().syntax(), - TextSize::new(1) - )); - assert!(follows_newline( - Org::parse(" \ra").document().syntax(), - TextSize::new(1) - )); - assert!(!follows_newline( - Org::parse(" *a*").document().syntax(), - TextSize::new(1) - )); - assert!(!follows_newline( - Org::parse(" a").document().syntax(), - TextSize::new(1) - )); - - assert_eq!(ReplaceWithShape::new(""), ReplaceWithShape::Other); - assert_eq!(ReplaceWithShape::new(" ** a"), ReplaceWithShape::Other); - assert_eq!( - ReplaceWithShape::new("\n** a"), - ReplaceWithShape::IncludeHeadline { level: 2 } - ); - assert_eq!( - ReplaceWithShape::new("** a"), - ReplaceWithShape::ExactHeadline { level: 2 } - ); - assert_eq!( - ReplaceWithShape::new("** a\n* 1"), - ReplaceWithShape::IncludeHeadline { level: 1 } - ); - assert_eq!( - ReplaceWithShape::new("* a\n** 1"), - ReplaceWithShape::ExactHeadline { level: 1 } - ); - assert_eq!( - ReplaceWithShape::new("** a\n** 1"), - ReplaceWithShape::IncludeHeadline { level: 2 } - ); - - assert!(matches!( - RangeShape::new( - Org::parse("** abc\n** b").document().syntax, - TextRange::new(0.into(), 7.into()) - ), - RangeShape::ExactHeadline { level: 2, .. } - )); - assert!(matches!( - RangeShape::new( - Org::parse("** abc\n** b").document().syntax, - TextRange::new(3.into(), 7.into()) - ), - RangeShape::InsideHeadline { level: 2, .. } - )); - assert!(matches!( - RangeShape::new( - Org::parse("** abc\n** b").document().syntax, - TextRange::new(2.into(), 7.into()) - ), - RangeShape::Other - )); - assert!(matches!( - RangeShape::new( - Org::parse("* abc\n** b").document().syntax, - TextRange::new(4.into(), 7.into()) - ), - RangeShape::InsideHeadline { level: 1, .. } - )); - - macro_rules! t { - ($input:literal, $replace:literal) => { - let start = $input.find('|').unwrap(); - let end = $input.rfind('|').unwrap(); - - let input = format!( - "{}{}{}", - &$input[0..start], - &$input[start + 1..end], - &$input[end + 1..] - ); - let output = format!("{}{}{}", &$input[0..start], $replace, &$input[end + 1..]); - - let mut org = Org::parse(input); - org.replace_range( - TextRange::new((start as u32).into(), (end as u32 - 1).into()), - $replace, - ); - - debug_assert_eq!( - format!("{:#?}", org.document().syntax), - format!("{:#?}", Org::parse(output).document().syntax), - ); - }; - } - - t!("||", ""); - t!("||", "** abc"); - t!("*** abc |edf|", "fde"); - t!("*|** abc edf|", "fde"); - t!("* abc \n|** edf|", "** abc"); - t!("* ab|c \n*| edf", "** abc"); - - t!("* abc \n|** edf|", "** abc"); - t!("* abc \n|** edf|", "** eee\n** eee"); - t!("* abc \n|** edf|", "*** abc"); - t!("* abc \n*|* edf|", "*** abc"); - t!("* abc \n**| edf|", "*** abc"); - t!("* abc \n**| |edf", "*** abc"); - t!("* abc \n** |edf|", "*** abc"); - t!("* abc \n** |edf|", "\n*** abc"); - t!("* abc \n** |edf|", "\n** abc"); - t!("* abc \n** |edf|", "\n* abc"); - t!("* abc \n** \n|edf|", "* abc"); - t!("* abc \n** \n|edf|", "* abc\n* abc"); - t!("* abc \n** |edf|", "* abc"); - t!("* abc \n** |edf|", "* abc\n* abc"); - t!("* abc \n|* edf\n|* gh", "* hg"); - t!("* abc \n|* edf\n|* gh", "* hg\n"); - t!("* abc \n* edf\n|* gh|", "* hg"); -} diff --git a/src/syntax/block.rs b/src/syntax/block.rs deleted file mode 100644 index 4f5ec75..0000000 --- a/src/syntax/block.rs +++ /dev/null @@ -1,305 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, tag_no_case, take_while, take_while1}, - character::complete::{alpha1, space0, space1}, - combinator::{cond, opt}, - sequence::{separated_pair, tuple}, - IResult, InputTake, -}; - -use super::{ - combinator::{ - blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement, - NodeBuilder, - }, - element::element_nodes, - input::Input, - keyword::affiliated_keyword_nodes, - SyntaxKind::*, -}; - -fn block_node_base(input: Input) -> IResult { - let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?; - let (input, (block_begin, name)) = block_begin_node(input)?; - let (input, pre_blank) = blank_lines(input)?; - - let kind = match name { - s if s.eq_ignore_ascii_case("COMMENT") => COMMENT_BLOCK, - s if s.eq_ignore_ascii_case("EXAMPLE") => EXAMPLE_BLOCK, - s if s.eq_ignore_ascii_case("EXPORT") => EXPORT_BLOCK, - s if s.eq_ignore_ascii_case("SRC") => SOURCE_BLOCK, - s if s.eq_ignore_ascii_case("CENTER") => CENTER_BLOCK, - s if s.eq_ignore_ascii_case("QUOTE") => QUOTE_BLOCK, - s if s.eq_ignore_ascii_case("VERSE") => VERSE_BLOCK, - _ => SPECIAL_BLOCK, - }; - - for (input, contents) in line_starts_iter(&input).map(|i| input.take_split(i)) { - if let Ok((input, block_end)) = block_end_node(input, name) { - let (input, post_blank) = blank_lines(input)?; - - let mut children = vec![]; - children.extend(affiliated_keywords); - children.push(block_begin); - children.extend(pre_blank); - if kind.is_greater_element() { - children.push(node(BLOCK_CONTENT, element_nodes(contents)?)); - } else { - children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents))); - } - children.push(block_end); - children.extend(post_blank); - return Ok((input, node(kind, children))); - } - } - - Err(nom::Err::Error(())) -} - -fn block_begin_node(input: Input) -> IResult { - let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?; - - let mut b = NodeBuilder::new(); - b.ws(ws1); - b.text(begin); - b.text(name); - - if name.eq_ignore_ascii_case("SRC") { - let (input, language) = opt(tuple(( - space1, - take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), - )))(input)?; - let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?; - let (input, ws1) = space0(input)?; - let (input, (parameters, ws2, nl)) = trim_line_end(input)?; - - if let Some((ws, language)) = language { - b.ws(ws); - b.token(SRC_BLOCK_LANGUAGE, language); - } - if let Some((ws, switches)) = switches { - b.ws(ws); - b.token(SRC_BLOCK_SWITCHES, switches); - } - b.ws(ws1); - if !parameters.is_empty() { - b.token(SRC_BLOCK_PARAMETERS, parameters); - } - b.ws(ws2); - b.nl(nl); - Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) - } else if name.eq_ignore_ascii_case("EXPORT") { - let (input, ty) = opt(tuple(( - space1, - take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), - )))(input)?; - let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; - let (input, nl) = eol_or_eof(input)?; - - if let Some((ws, ty)) = ty { - b.ws(ws); - b.token(EXPORT_BLOCK_TYPE, ty); - } - b.text(data); - b.nl(nl); - Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) - } else { - let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; - let (input, nl) = eol_or_eof(input)?; - - b.text(data); - b.nl(nl); - Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) - } -} - -fn source_block_switches(input: Input) -> IResult { - let mut i = input; - - while !i.is_empty() { - match tuple::<_, _, (), _>(( - cond(i.len() != input.len(), space1), - alt(( - separated_pair( - alt((tag("-l"), tag("-n"))), - space1, - take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), - ), - tuple((tag("+"), alpha1)), - tuple((tag("-"), alpha1)), - )), - ))(i) - { - Ok((i_, _)) => i = i_, - _ => break, - } - } - - let len = input.len() - i.len(); - - if len == 0 { - Err(nom::Err::Error(())) - } else { - Ok(input.take_split(len)) - } -} - -fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult, GreenElement, ()> { - let (input, (ws, end, name, ws_, nl)) = - tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?; - - let mut b = NodeBuilder::new(); - b.ws(ws); - b.text(end); - b.text(name); - b.ws(ws_); - b.nl(nl); - - Ok((input, b.finish(BLOCK_END))) -} - -fn comma_quoted_text_nodes(input: Input) -> Vec { - let mut nodes = vec![]; - - let s = input.as_str(); - - let mut start = 0; - for i in line_starts_iter(s) { - // line must start with either ",*" or ",#+" - if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") { - continue; - } - - let text = &s[start..i]; - if !text.is_empty() { - nodes.push(token(TEXT, text)); - } - - nodes.push(token(COMMA, ",")); - start = i + 1; - } - - if !s[start..].is_empty() { - nodes.push(token(TEXT, &s[start..])); - } - - nodes -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn block_node(input: Input) -> IResult { - crate::lossless_parser!(block_node_base, input) -} - -#[test] -fn test_parse() { - use crate::ast::{ExampleBlock, SourceBlock}; - use crate::tests::to_ast; - - let to_src_block = to_ast::(block_node); - let to_example_block = to_ast::(block_node); - - insta::assert_debug_snapshot!( - to_example_block( -r#"#+BEGIN_EXAMPLE -,* headline -,#+block -text - #+END_EXAMPLE"# - ).syntax, - @r###" - EXAMPLE_BLOCK@0..59 - BLOCK_BEGIN@0..16 - TEXT@0..8 "#+BEGIN_" - TEXT@8..15 "EXAMPLE" - NEW_LINE@15..16 "\n" - BLOCK_CONTENT@16..42 - COMMA@16..17 "," - TEXT@17..28 "* headline\n" - COMMA@28..29 "," - TEXT@29..42 "#+block\ntext\n" - BLOCK_END@42..59 - WHITESPACE@42..46 " " - TEXT@46..52 "#+END_" - TEXT@52..59 "EXAMPLE" - "### - ); - - insta::assert_debug_snapshot!( - to_src_block( -r#"#+BEGIN_SRC - - - #+END_SRC"# - ).syntax, - @r###" - SOURCE_BLOCK@0..27 - BLOCK_BEGIN@0..12 - TEXT@0..8 "#+BEGIN_" - TEXT@8..11 "SRC" - NEW_LINE@11..12 "\n" - BLANK_LINE@12..13 "\n" - BLANK_LINE@13..14 "\n" - BLOCK_CONTENT@14..14 - BLOCK_END@14..27 - WHITESPACE@14..18 " " - TEXT@18..24 "#+END_" - TEXT@24..27 "SRC" - "### - ); - - insta::assert_debug_snapshot!( - to_src_block( -r#"#+begin_src - #+end_src"# - ).syntax, - @r###" - SOURCE_BLOCK@0..25 - BLOCK_BEGIN@0..12 - TEXT@0..8 "#+begin_" - TEXT@8..11 "src" - NEW_LINE@11..12 "\n" - BLOCK_CONTENT@12..12 - BLOCK_END@12..25 - WHITESPACE@12..16 " " - TEXT@16..22 "#+end_" - TEXT@22..25 "src" - "### - ); - - insta::assert_debug_snapshot!( - to_src_block( -r#"#+BEGIN_SRC javascript -n 20 -r :var n=0, l=2 :foo=bar -alert('Hello World!'); - #+END_SRC - - "#).syntax, - @r###" - SOURCE_BLOCK@0..100 - BLOCK_BEGIN@0..58 - TEXT@0..8 "#+BEGIN_" - TEXT@8..11 "SRC" - WHITESPACE@11..12 " " - SRC_BLOCK_LANGUAGE@12..22 "javascript" - WHITESPACE@22..24 " " - SRC_BLOCK_SWITCHES@24..32 "-n 20 -r" - WHITESPACE@32..34 " " - SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar" - NEW_LINE@57..58 "\n" - BLOCK_CONTENT@58..81 - TEXT@58..81 "alert('Hello World!');\n" - BLOCK_END@81..95 - WHITESPACE@81..85 " " - TEXT@85..91 "#+END_" - TEXT@91..94 "SRC" - NEW_LINE@94..95 "\n" - BLANK_LINE@95..96 "\n" - BLANK_LINE@96..100 " " - "### - ); - - // TODO: more testing -} diff --git a/src/syntax/clock.rs b/src/syntax/clock.rs deleted file mode 100644 index 92b0557..0000000 --- a/src/syntax/clock.rs +++ /dev/null @@ -1,134 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::tag, - character::complete::{digit1, space0}, - combinator::{map, opt, recognize}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder, - }, - input::Input, - timestamp::{timestamp_active_node, timestamp_inactive_node}, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn clock_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - space0, - tag("CLOCK:"), - space0, - alt((timestamp_inactive_node, timestamp_active_node)), - opt(tuple(( - space0, - double_arrow_token, - space0, - recognize(tuple((digit1, colon_token, digit1))), - ))), - space0, - eol_or_eof, - blank_lines, - )), - |(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| { - let mut b = NodeBuilder::new(); - - b.ws(ws); - b.text(clock); - b.ws(ws_); - b.push(timestamp); - if let Some((ws, double_arrow, ws_, time)) = duration { - b.ws(ws); - b.push(double_arrow); - b.ws(ws_); - b.text(time); - } - b.ws(ws__); - b.nl(nl); - b.children.extend(post_blank); - b.finish(SyntaxKind::CLOCK) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::ast::Clock; - use crate::tests::to_ast; - - let to_clock = to_ast::(clock_node); - - insta::assert_debug_snapshot!( - to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax, - @r###" - CLOCK@0..29 - TEXT@0..6 "CLOCK:" - WHITESPACE@6..7 " " - TIMESTAMP_INACTIVE@7..29 - L_BRACKET@7..8 "[" - TIMESTAMP_YEAR@8..12 "2003" - MINUS@12..13 "-" - TIMESTAMP_MONTH@13..15 "09" - MINUS@15..16 "-" - TIMESTAMP_DAY@16..18 "16" - WHITESPACE@18..19 " " - TIMESTAMP_DAYNAME@19..22 "Tue" - WHITESPACE@22..23 " " - TIMESTAMP_HOUR@23..25 "09" - COLON@25..26 ":" - TIMESTAMP_MINUTE@26..28 "39" - R_BRACKET@28..29 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax, - @r###" - CLOCK@0..64 - TEXT@0..6 "CLOCK:" - WHITESPACE@6..7 " " - TIMESTAMP_INACTIVE@7..53 - L_BRACKET@7..8 "[" - TIMESTAMP_YEAR@8..12 "2003" - MINUS@12..13 "-" - TIMESTAMP_MONTH@13..15 "09" - MINUS@15..16 "-" - TIMESTAMP_DAY@16..18 "16" - WHITESPACE@18..19 " " - TIMESTAMP_DAYNAME@19..22 "Tue" - WHITESPACE@22..23 " " - TIMESTAMP_HOUR@23..25 "09" - COLON@25..26 ":" - TIMESTAMP_MINUTE@26..28 "39" - R_BRACKET@28..29 "]" - MINUS2@29..31 "--" - L_BRACKET@31..32 "[" - TIMESTAMP_YEAR@32..36 "2003" - MINUS@36..37 "-" - TIMESTAMP_MONTH@37..39 "09" - MINUS@39..40 "-" - TIMESTAMP_DAY@40..42 "16" - WHITESPACE@42..43 " " - TIMESTAMP_DAYNAME@43..46 "Tue" - WHITESPACE@46..47 " " - TIMESTAMP_HOUR@47..49 "10" - COLON@49..50 ":" - TIMESTAMP_MINUTE@50..52 "39" - R_BRACKET@52..53 "]" - WHITESPACE@53..54 " " - DOUBLE_ARROW@54..56 "=>" - WHITESPACE@56..58 " " - TEXT@58..62 "1:00" - NEW_LINE@62..63 "\n" - BLANK_LINE@63..64 "\n" - "### - ); -} diff --git a/src/syntax/cloze.rs b/src/syntax/cloze.rs deleted file mode 100644 index 20645a4..0000000 --- a/src/syntax/cloze.rs +++ /dev/null @@ -1,162 +0,0 @@ -use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake}; - -use crate::syntax::{ - combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token}, - object::standard_object_nodes, -}; - -use super::{ - combinator::{GreenElement, NodeBuilder}, - input::Input, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn cloze_node(input: Input) -> IResult { - crate::lossless_parser!(cloze_node_base, input) -} - -fn cloze_node_base(input: Input) -> IResult { - let (input, l_curly2) = l_curly2_token(input)?; - - let mut inside_latex = false; - let mut text_end = 0; - for (index, byte) in input.bytes().enumerate() { - match byte { - b'}' if !inside_latex => { - text_end = index; - break; - } - b'$' => { - inside_latex = !inside_latex; - } - _ => {} - } - } - - if text_end == 0 { - return Err(nom::Err::Error(())); - } - - let (input, text) = input.take_split(text_end); - - let (input, r_curly) = r_curly_token(input)?; - - let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?; - - let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?; - - let (input, r_curly_) = r_curly_token(input)?; - - let mut b = NodeBuilder::new(); - - b.push(l_curly2); - b.children.extend(standard_object_nodes(text)); - b.push(r_curly); - - if let Some((l_curly, hint, r_curly)) = hint { - b.push(l_curly); - b.token(SyntaxKind::TEXT, hint); - b.push(r_curly); - } - - if let Some((at, id)) = id { - b.push(at); - b.token(SyntaxKind::TEXT, id); - } - - b.push(r_curly_); - - Ok((input, b.finish(SyntaxKind::CLOZE))) -} - -#[test] -fn parse() { - use crate::ast::Cloze; - use crate::config::ParseConfig; - use crate::tests::to_ast; - - let to_cloze = to_ast::(cloze_node); - - insta::assert_debug_snapshot!( - to_cloze("{{text}}").syntax, - @r###" - CLOZE@0..8 - L_CURLY2@0..2 "{{" - TEXT@2..6 "text" - R_CURLY@6..7 "}" - R_CURLY@7..8 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_cloze("{{text}@id}").syntax, - @r###" - CLOZE@0..11 - L_CURLY2@0..2 "{{" - TEXT@2..6 "text" - R_CURLY@6..7 "}" - AT@7..8 "@" - TEXT@8..10 "id" - R_CURLY@10..11 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_cloze("{{text}{hint}}").syntax, - @r###" - CLOZE@0..14 - L_CURLY2@0..2 "{{" - TEXT@2..6 "text" - R_CURLY@6..7 "}" - L_CURLY@7..8 "{" - TEXT@8..12 "hint" - R_CURLY@12..13 "}" - R_CURLY@13..14 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_cloze("{{text}{hint}@id}").syntax, - @r###" - CLOZE@0..17 - L_CURLY2@0..2 "{{" - TEXT@2..6 "text" - R_CURLY@6..7 "}" - L_CURLY@7..8 "{" - TEXT@8..12 "hint" - R_CURLY@12..13 "}" - AT@13..14 "@" - TEXT@14..16 "id" - R_CURLY@16..17 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax, - @r###" - CLOZE@0..28 - L_CURLY2@0..2 "{{" - LATEX_FRAGMENT@2..15 - DOLLAR@2..3 "$" - TEXT@3..14 "\\frac{a}{b}" - DOLLAR@14..15 "$" - R_CURLY@15..16 "}" - L_CURLY@16..17 "{" - TEXT@17..26 "fractions" - R_CURLY@26..27 "}" - R_CURLY@27..28 "}" - "### - ); - - let config = &ParseConfig::default(); - - assert!(cloze_node(("{{}}", config).into()).is_err()); - assert!(cloze_node(("{{text}", config).into()).is_err()); - assert!(cloze_node(("{text}}", config).into()).is_err()); - assert!(cloze_node(("{{text}{}", config).into()).is_err()); - assert!(cloze_node(("{{text}a}", config).into()).is_err()); -} diff --git a/src/syntax/combinator.rs b/src/syntax/combinator.rs deleted file mode 100644 index bd69797..0000000 --- a/src/syntax/combinator.rs +++ /dev/null @@ -1,327 +0,0 @@ -use memchr::{memchr2, memchr2_iter, Memchr2}; -use nom::{bytes::complete::tag, IResult, InputTake, Slice}; -use rowan::{GreenNode, GreenToken, Language, NodeOrToken}; -use std::iter::once; - -use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*}; - -pub type GreenElement = NodeOrToken; - -#[inline] -pub fn token(kind: SyntaxKind, input: &str) -> GreenElement { - GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input)) -} - -#[inline] -pub fn node(kind: SyntaxKind, children: I) -> GreenElement -where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, -{ - GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children)) -} - -macro_rules! token_parser { - ($name:ident, $token:literal, $kind:ident) => { - #[doc = "Recognizes `"] - #[doc = $token] - #[doc = "` and returns GreenToken"] - pub fn $name(input: Input) -> IResult { - let (i, o) = tag($token)(input)?; - Ok((i, token($kind, o.as_str()))) - } - }; -} - -token_parser!(l_bracket_token, "[", L_BRACKET); -token_parser!(r_bracket_token, "]", R_BRACKET); -token_parser!(l_bracket2_token, "[[", L_BRACKET2); -token_parser!(r_bracket2_token, "]]", R_BRACKET2); -token_parser!(l_parens_token, "(", L_PARENS); -token_parser!(r_parens_token, ")", R_PARENS); -token_parser!(l_angle_token, "<", L_ANGLE); -token_parser!(r_angle_token, ">", R_ANGLE); -token_parser!(l_curly_token, "{", L_CURLY); -#[cfg(feature = "syntax-org-fc")] -token_parser!(l_curly2_token, "{{", L_CURLY2); -token_parser!(r_curly_token, "}", R_CURLY); -token_parser!(l_curly3_token, "{{{", L_CURLY3); -token_parser!(r_curly3_token, "}}}", R_CURLY3); -token_parser!(l_angle2_token, "<<", L_ANGLE2); -token_parser!(r_angle2_token, ">>", R_ANGLE2); -token_parser!(l_angle3_token, "<<<", L_ANGLE3); -token_parser!(r_angle3_token, ">>>", R_ANGLE3); -token_parser!(at_token, "@", AT); -token_parser!(at2_token, "@@", AT2); -token_parser!(minus2_token, "--", MINUS2); -// token_parser!(percent_token, "%", PERCENT); -token_parser!(percent2_token, "%%", PERCENT2); -// token_parser!(slash_token, "/", SLASH); -token_parser!(backslash_token, "\\", BACKSLASH); -token_parser!(underscore_token, "_", UNDERSCORE); -// token_parser!(star_token, "*", STAR); -// token_parser!(plus_token, "+", PLUS); -token_parser!(minus_token, "-", MINUS); -token_parser!(colon_token, ":", COLON); -token_parser!(colon2_token, "::", COLON2); -token_parser!(pipe_token, "|", PIPE); -token_parser!(dollar_token, "$", DOLLAR); -token_parser!(dollar2_token, "$$", DOLLAR2); -// token_parser!(equal_token, "=", EQUAL); -// token_parser!(tilde_token, "~", TILDE); -token_parser!(hash_plus_token, "#+", HASH_PLUS); -token_parser!(caret_token, "^", CARET); -token_parser!(hash_token, "#", HASH); -token_parser!(double_arrow_token, "=>", DOUBLE_ARROW); - -macro_rules! lossless_parser { - ($parser:expr, $input:expr) => {{ - let i_ = $input; - let (i, o) = $parser($input)?; - cfg_if::cfg_if! { - if #[cfg(feature = "tracing")] { - tracing::trace!(consumed = o.to_string()); - } - } - debug_assert_eq!( - &i_.as_str()[0..(i_.len() - i.len())], - &o.to_string(), - stringify!("parser must be lossless") - ); - Ok((i, o)) - }}; -} - -pub(crate) use lossless_parser; - -/// Takes all blank lines -pub fn blank_lines(input: Input) -> IResult, ()> { - if input.is_empty() { - return Ok((input, vec![])); - } - - let mut lines = vec![]; - let mut start = 0; - let bytes = input.as_bytes(); - - for index in line_ends_iter(input.as_str()) { - if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) { - lines.push(token(BLANK_LINE, &input.as_str()[start..index])); - start = index; - } else { - break; - } - } - - Ok((input.slice(start..), lines)) -} - -#[test] -fn test_blank_lines() { - use crate::config::ParseConfig; - let config = &ParseConfig::default(); - let (input, output) = blank_lines(("", config).into()).unwrap(); - assert_eq!(input.as_str(), ""); - assert_eq!(output, vec![]); - - let (input, output) = blank_lines(("\n", config).into()).unwrap(); - assert_eq!(input.as_str(), ""); - assert_eq!(output.len(), 1); - assert_eq!(output[0].to_string(), "\n"); - - let (input, output) = blank_lines((" t", config).into()).unwrap(); - assert_eq!(input.as_str(), " t"); - assert_eq!(output, vec![]); - - let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap(); - assert_eq!(input.as_str(), ""); - assert_eq!(output.len(), 5); - assert_eq!(output[0].to_string(), " \r\n"); - assert_eq!(output[1].to_string(), "\n"); - assert_eq!(output[2].to_string(), "\t\t\r\n"); - assert_eq!(output[3].to_string(), " \n"); - assert_eq!(output[4].to_string(), " "); - - let (input, output) = - blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap(); - assert_eq!(input.as_str(), " t\n "); - assert_eq!(output.len(), 6); - assert_eq!(output[0].to_string(), "\r\n"); - assert_eq!(output[1].to_string(), "\n"); - assert_eq!(output[2].to_string(), "\t\t\r\n"); - assert_eq!(output[3].to_string(), " \n"); - assert_eq!(output[4].to_string(), "\r"); - assert_eq!(output[5].to_string(), " \r"); -} - -/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding -pub fn trim_line_end(input: Input) -> IResult { - let bytes = input.as_bytes(); - - let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) { - Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => ( - input.slice(i + 2..), - input.slice(0..i), - input.slice(i..i + 2), - ), - Some(i) => ( - input.slice(i + 1..), - input.slice(0..i), - input.slice(i..i + 1), - ), - _ => (input.of(""), input, input.of("")), - }; - - let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) { - Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)), - None => (contents.of(""), contents), - }; - - Ok((input, (contents, ws, nl))) -} - -#[test] -fn test_trim_line_end() { - use crate::config::ParseConfig; - let config = &ParseConfig::default(); - let (input, output) = trim_line_end(("", config).into()).unwrap(); - assert_eq!(input.as_str(), ""); - assert_eq!(output.0.as_str(), ""); - assert_eq!(output.1.as_str(), ""); - assert_eq!(output.2.as_str(), ""); - - let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap(); - assert_eq!(input.as_str(), ""); - assert_eq!(output.0.as_str(), "* hello, world :abc:"); - assert_eq!(output.1.as_str(), ""); - assert_eq!(output.2.as_str(), ""); - - let (input, output) = - trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap(); - assert_eq!(input.as_str(), "rest\n"); - assert_eq!(output.0.as_str(), "* hello, world :abc:"); - assert_eq!(output.1.as_str(), " "); - assert_eq!(output.2.as_str(), "\r\n"); - - let (input, output) = trim_line_end((" \rr", config).into()).unwrap(); - assert_eq!(input.as_str(), "r"); - assert_eq!(output.0.as_str(), ""); - assert_eq!(output.1.as_str(), " "); - assert_eq!(output.2.as_str(), "\r"); -} - -/// Recognizes a line ending \r, \n, \r\n or end of file -pub fn eol_or_eof(input: Input) -> IResult { - let mut bytes = input.bytes(); - - let count = match bytes.next() { - Some(b'\n') => 1, - Some(b'\r') => { - if matches!(bytes.next(), Some(b'\n')) { - 2 - } else { - 1 - } - } - None => 0, - _ => return Err(nom::Err::Error(())), - }; - - Ok(input.take_split(count)) -} - -struct LineStart<'a> { - bytes: &'a [u8], - iter: Memchr2<'a>, -} - -impl<'a> LineStart<'a> { - fn new(input: &'a str) -> Self { - let bytes = input.as_bytes(); - LineStart { - bytes, - iter: memchr2_iter(b'\r', b'\n', bytes), - } - } -} - -impl<'a> Iterator for LineStart<'a> { - type Item = usize; - - fn next(&mut self) -> Option { - let i = self.iter.next()?; - if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') { - let ii = self.iter.next(); - debug_assert_eq!(i + 1, ii.unwrap()); - Some(i + 2) - } else { - Some(i + 1) - } - } -} - -/// Returns an iterator of positions of line start, including zero -pub fn line_starts_iter(s: &str) -> impl Iterator + '_ { - once(0).chain(LineStart::new(s)) -} - -/// Returns an iterator of positions of line end, including eof -pub fn line_ends_iter(s: &str) -> impl Iterator + '_ { - LineStart::new(s).chain(once(s.len())) -} - -pub struct NodeBuilder { - pub children: Vec, -} - -impl NodeBuilder { - pub fn new() -> NodeBuilder { - NodeBuilder { children: vec![] } - } - - pub fn ws(&mut self, i: Input) { - if !i.is_empty() { - debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace())); - self.children.push(i.ws_token()) - } - } - - pub fn nl(&mut self, i: Input) { - if !i.is_empty() { - debug_assert!( - i.s == "\n" || i.s == "\r\n" || i.s == "\r", - "{:?} should be a new line", - i.s - ); - self.children.push(i.nl_token()) - } - } - - pub fn text(&mut self, i: Input) { - if !i.is_empty() { - self.children.push(i.text_token()) - } - } - - pub fn token(&mut self, kind: SyntaxKind, i: Input) { - self.children.push(i.token(kind)) - } - - pub fn push(&mut self, elem: GreenElement) { - self.children.push(elem) - } - - pub fn push_opt(&mut self, elem: Option) { - if let Some(elem) = elem { - self.children.push(elem) - } - } - - pub fn len(&self) -> usize { - self.children.len() - } - - pub fn finish(self, kind: SyntaxKind) -> GreenElement { - GreenElement::Node(GreenNode::new(kind.into(), self.children)) - } -} diff --git a/src/syntax/comment.rs b/src/syntax/comment.rs deleted file mode 100644 index 33c7805..0000000 --- a/src/syntax/comment.rs +++ /dev/null @@ -1,115 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_while}, - character::complete::{space0, space1}, - combinator::{iterator, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, - input::Input, - SyntaxKind, -}; - -fn comment_node_base(input: Input) -> IResult { - let mut b = NodeBuilder::new(); - - let mut iter = iterator( - input, - opt(tuple(( - space0, - tag("#"), - opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), - eol_or_eof, - ))), - ); - - for (idx, option) in iter.enumerate() { - match option { - Some((ws, common, content, eol)) => { - b.ws(ws); - b.token(SyntaxKind::HASH, common); - if let Some((ws, text)) = content { - b.ws(ws); - b.text(text); - } - b.text(eol); - } - _ if idx == 0 => return Err(nom::Err::Error(())), - _ => break, - } - } - - let (input, _) = iter.finish()?; - - let (input, post_blank) = blank_lines(input)?; - - b.children.extend(post_blank); - - Ok((input, b.finish(SyntaxKind::COMMENT))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn comment_node(input: Input) -> IResult { - crate::lossless_parser!(comment_node_base, input) -} - -#[test] -fn parse() { - use crate::{ - syntax::{comment::comment_node, input::Input, SyntaxNode}, - ParseConfig, - }; - - let t = |input: &str| { - SyntaxNode::new_root( - comment_node(Input { - s: input, - c: &ParseConfig::default(), - }) - .unwrap() - .1 - .into_node() - .unwrap(), - ) - }; - - insta::assert_debug_snapshot!( - t("#"), - @r###" - COMMENT@0..1 - HASH@0..1 "#" - "### - ); - - insta::assert_debug_snapshot!( - t("#\n # a\n #\n\n"), - @r###" - COMMENT@0..12 - HASH@0..1 "#" - TEXT@1..2 "\n" - WHITESPACE@2..4 " " - HASH@4..5 "#" - WHITESPACE@5..6 " " - TEXT@6..7 "a" - TEXT@7..8 "\n" - WHITESPACE@8..9 " " - HASH@9..10 "#" - TEXT@10..11 "\n" - BLANK_LINE@11..12 "\n" - "### - ); - - insta::assert_debug_snapshot!( - t("#\na\n #\n\n"), - @r###" - COMMENT@0..2 - HASH@0..1 "#" - TEXT@1..2 "\n" - "### - ); -} diff --git a/src/syntax/cookie.rs b/src/syntax/cookie.rs deleted file mode 100644 index f54cb0e..0000000 --- a/src/syntax/cookie.rs +++ /dev/null @@ -1,147 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::tag, - character::complete::digit0, - combinator::map, - sequence::{pair, separated_pair, tuple}, - IResult, -}; - -use super::{ - combinator::{l_bracket_token, node, r_bracket_token, token, GreenElement}, - input::Input, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn cookie_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_bracket_token, - alt(( - separated_pair(digit0, tag("/"), digit0), - pair(digit0, tag("%")), - )), - r_bracket_token, - )), - |(l_bracket, value, r_bracket)| { - let mut children = vec![l_bracket]; - - children.push(token(TEXT, value.0.as_str())); - match value.1.as_str() { - "%" => { - children.push(token(PERCENT, value.1.as_str())); - } - _ => { - children.push(token(SLASH, "/")); - children.push(token(TEXT, value.1.as_str())); - } - } - children.push(r_bracket); - - node(COOKIE, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::ast::Cookie; - use crate::tests::to_ast; - use crate::ParseConfig; - - let to_cookie = to_ast::(cookie_node); - - insta::assert_debug_snapshot!( - to_cookie("[1/10]").syntax, - @r###" - COOKIE@0..6 - L_BRACKET@0..1 "[" - TEXT@1..2 "1" - SLASH@2..3 "/" - TEXT@3..5 "10" - R_BRACKET@5..6 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[1/1000]").syntax, - @r###" - COOKIE@0..8 - L_BRACKET@0..1 "[" - TEXT@1..2 "1" - SLASH@2..3 "/" - TEXT@3..7 "1000" - R_BRACKET@7..8 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[10%]").syntax, - @r###" - COOKIE@0..5 - L_BRACKET@0..1 "[" - TEXT@1..3 "10" - PERCENT@3..4 "%" - R_BRACKET@4..5 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[%]").syntax, - @r###" - COOKIE@0..3 - L_BRACKET@0..1 "[" - TEXT@1..1 "" - PERCENT@1..2 "%" - R_BRACKET@2..3 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[/]").syntax, - @r###" - COOKIE@0..3 - L_BRACKET@0..1 "[" - TEXT@1..1 "" - SLASH@1..2 "/" - TEXT@2..2 "" - R_BRACKET@2..3 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[100/]").syntax, - @r###" - COOKIE@0..6 - L_BRACKET@0..1 "[" - TEXT@1..4 "100" - SLASH@4..5 "/" - TEXT@5..5 "" - R_BRACKET@5..6 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_cookie("[/100]").syntax, - @r###" - COOKIE@0..6 - L_BRACKET@0..1 "[" - TEXT@1..1 "" - SLASH@1..2 "/" - TEXT@2..5 "100" - R_BRACKET@5..6 "]" - "### - ); - - let config = &ParseConfig::default(); - - assert!(cookie_node(("[10% ]", config).into()).is_err()); - assert!(cookie_node(("[1//100]", config).into()).is_err()); - assert!(cookie_node(("[1\\100]", config).into()).is_err()); - assert!(cookie_node(("[10%%]", config).into()).is_err()); -} diff --git a/src/syntax/document.rs b/src/syntax/document.rs deleted file mode 100644 index b32fc08..0000000 --- a/src/syntax/document.rs +++ /dev/null @@ -1,139 +0,0 @@ -use nom::{combinator::opt, IResult}; - -use super::{ - combinator::{blank_lines, node, GreenElement}, - drawer::property_drawer_node, - headline::{headline_node, section_node}, - input::Input, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn document_node(input: Input) -> IResult { - crate::lossless_parser!(document_node_base, input) -} - -fn document_node_base(input: Input) -> IResult { - if input.is_empty() { - return Ok((input, node(DOCUMENT, []))); - } - - let mut children = vec![]; - - let (input, property_drawer) = opt(property_drawer_node)(input)?; - if let Some(property_drawer) = property_drawer { - children.push(property_drawer); - } - - let (input, pre_blank) = blank_lines(input)?; - - children.extend(pre_blank); - - if input.is_empty() { - return Ok((input, node(DOCUMENT, children))); - } - - let (input, section) = opt(section_node)(input)?; - if let Some(section) = section { - children.push(section); - } - - let mut i = input; - while !i.is_empty() { - let (input, headline) = headline_node(i)?; - debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len(),); - i = input; - children.push(headline); - } - - Ok((i, node(DOCUMENT, children))) -} - -#[test] -fn parse() { - use crate::ast::Document; - use crate::tests::to_ast; - - let to_document = to_ast::(document_node); - - insta::assert_debug_snapshot!( - to_document("").syntax, - @r###" - DOCUMENT@0..0 - "### - ); - - insta::assert_debug_snapshot!( - to_document("\n \n\n").syntax, - @r###" - DOCUMENT@0..5 - BLANK_LINE@0..1 "\n" - BLANK_LINE@1..4 " \n" - BLANK_LINE@4..5 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_document("section").syntax, - @r###" - DOCUMENT@0..7 - SECTION@0..7 - PARAGRAPH@0..7 - TEXT@0..7 "section" - "### - ); - - insta::assert_debug_snapshot!( - to_document("\n* section").syntax, - @r###" - DOCUMENT@0..10 - BLANK_LINE@0..1 "\n" - HEADLINE@1..10 - HEADLINE_STARS@1..2 "*" - WHITESPACE@2..3 " " - HEADLINE_TITLE@3..10 - TEXT@3..10 "section" - "### - ); - - insta::assert_debug_snapshot!( - to_document("\n** heading 2\n* heading 1").syntax, - @r###" - DOCUMENT@0..25 - BLANK_LINE@0..1 "\n" - HEADLINE@1..14 - HEADLINE_STARS@1..3 "**" - WHITESPACE@3..4 " " - HEADLINE_TITLE@4..13 - TEXT@4..13 "heading 2" - NEW_LINE@13..14 "\n" - HEADLINE@14..25 - HEADLINE_STARS@14..15 "*" - WHITESPACE@15..16 " " - HEADLINE_TITLE@16..25 - TEXT@16..25 "heading 1" - "### - ); - - insta::assert_debug_snapshot!( - to_document("section\n** heading 2\n*heading 1").syntax, - @r###" - DOCUMENT@0..31 - SECTION@0..8 - PARAGRAPH@0..8 - TEXT@0..8 "section\n" - HEADLINE@8..31 - HEADLINE_STARS@8..10 "**" - WHITESPACE@10..11 " " - HEADLINE_TITLE@11..20 - TEXT@11..20 "heading 2" - NEW_LINE@20..21 "\n" - SECTION@21..31 - PARAGRAPH@21..31 - TEXT@21..31 "*heading 1" - "### - ); -} diff --git a/src/syntax/drawer.rs b/src/syntax/drawer.rs deleted file mode 100644 index a44cbb0..0000000 --- a/src/syntax/drawer.rs +++ /dev/null @@ -1,275 +0,0 @@ -use nom::{ - bytes::complete::{tag_no_case, take_while1}, - character::complete::{space0, space1}, - combinator::{iterator, map, verify}, - sequence::tuple, - IResult, InputTake, -}; - -use super::{ - combinator::{ - blank_lines, colon_token, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, - NodeBuilder, - }, - element::element_nodes, - input::Input, - SyntaxKind::*, -}; - -fn drawer_begin_node(input: Input) -> IResult { - let mut b = NodeBuilder::new(); - - let (input, (ws, colon, name, colon_, ws_, nl)) = tuple(( - space0, - colon_token, - take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), - colon_token, - space0, - eol_or_eof, - ))(input)?; - - b.ws(ws); - b.push(colon); - b.text(name); - b.push(colon_); - b.ws(ws_); - b.nl(nl); - - Ok((input, (b.finish(DRAWER_BEGIN), name.as_str()))) -} - -fn drawer_end_node(input: Input) -> IResult { - let (input, (ws, colon, end, colon_, ws_, nl)) = tuple(( - space0, - colon_token, - tag_no_case("END"), - colon_token, - space0, - eol_or_eof, - ))(input)?; - - let mut b = NodeBuilder::new(); - b.ws(ws); - b.push(colon); - b.text(end); - b.push(colon_); - b.ws(ws_); - b.nl(nl); - - Ok((input, b.finish(DRAWER_END))) -} - -fn drawer_node_base(input: Input) -> IResult { - let (input, (begin, _)) = drawer_begin_node(input)?; - - let (input, pre_blank) = blank_lines(input)?; - - for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { - if let Ok((input, end)) = drawer_end_node(input) { - let (input, post_blank) = blank_lines(input)?; - let mut children = vec![begin]; - children.extend(pre_blank); - if !contents.is_empty() { - children.push(node(DRAWER_CONTENT, element_nodes(contents)?)); - } else { - children.push(node(DRAWER_CONTENT, [])); - } - children.push(end); - children.extend(post_blank); - - return Ok((input, node(DRAWER, children))); - } - } - - Err(nom::Err::Error(())) -} - -fn property_drawer_node_base(input: Input) -> IResult { - let (input, (begin, name)) = drawer_begin_node(input)?; - - if !name.eq_ignore_ascii_case("properties") { - return Err(nom::Err::Error(())); - } - - let mut children = vec![begin]; - - let mut it = iterator(input, node_property_node); - children.extend(&mut it); - let (input, _) = it.finish()?; - let (input, end) = drawer_end_node(input)?; - let (input, post_blank) = blank_lines(input)?; - - children.push(end); - children.extend(post_blank); - - Ok((input, node(PROPERTY_DRAWER, children))) -} - -fn node_property_node(input: Input) -> IResult { - let (input, ws1) = space0(input)?; - let (input, colon1) = colon_token(input)?; - let (input, (colon2, name)) = map( - verify( - take_while1(|c| c != ' ' && c != '\t' && c != '\n' && c != '\r'), - |i: &Input| i.ends_with(':'), - ), - |input: Input| input.take_split(input.len() - 1), - )(input)?; - let (input, ws2) = space1(input)?; - let (input, (value, ws3, nl)) = trim_line_end(input)?; - - let mut b = NodeBuilder::new(); - - b.ws(ws1); - b.push(colon1); - - if name.ends_with('+') { - let (plus, name) = name.take_split(name.len() - 1); - b.text(name); - b.token(PLUS, plus); - } else { - b.text(name); - } - - b.token(COLON, colon2); - b.ws(ws2); - b.text(value); - b.ws(ws3); - b.nl(nl); - - Ok((input, b.finish(NODE_PROPERTY))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn property_drawer_node(input: Input) -> IResult { - debug_assert!(!input.is_empty()); - crate::lossless_parser!(property_drawer_node_base, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn drawer_node(input: Input) -> IResult { - crate::lossless_parser!(drawer_node_base, input) -} - -#[test] -fn parse() { - use crate::{ - ast::{Drawer, PropertyDrawer}, - tests::to_ast, - ParseConfig, - }; - - let to_drawer = to_ast::(drawer_node); - let to_property_drawer = to_ast::(property_drawer_node); - - insta::assert_debug_snapshot!( - to_drawer( - r#":DRAWER: - :CUSTOM_ID: id - :END:"# - ).syntax, - @r###" - DRAWER@0..33 - DRAWER_BEGIN@0..9 - COLON@0..1 ":" - TEXT@1..7 "DRAWER" - COLON@7..8 ":" - NEW_LINE@8..9 "\n" - DRAWER_CONTENT@9..26 - PARAGRAPH@9..26 - TEXT@9..18 " :CUSTOM" - SUBSCRIPT@18..21 - UNDERSCORE@18..19 "_" - TEXT@19..21 "ID" - TEXT@21..26 ": id\n" - DRAWER_END@26..33 - WHITESPACE@26..28 " " - COLON@28..29 ":" - TEXT@29..32 "END" - COLON@32..33 ":" - "### - ); - - insta::assert_debug_snapshot!( - to_drawer( - r#":DRAWER: - - :END: - -"# - ).syntax, - @r###" - DRAWER@0..19 - DRAWER_BEGIN@0..9 - COLON@0..1 ":" - TEXT@1..7 "DRAWER" - COLON@7..8 ":" - NEW_LINE@8..9 "\n" - BLANK_LINE@9..10 "\n" - DRAWER_CONTENT@10..10 - DRAWER_END@10..18 - WHITESPACE@10..12 " " - COLON@12..13 ":" - TEXT@13..16 "END" - COLON@16..17 ":" - NEW_LINE@17..18 "\n" - BLANK_LINE@18..19 "\n" - "### - ); - - // https://github.com/PoiScript/orgize/issues/70#issuecomment-2099671563 - insta::assert_debug_snapshot!( - to_property_drawer(r#":PROPERTIES: -:header-args:clojure: :session *clojure-1* -:NAME: VALUE -:NAME+: VALUE -:END:"#).syntax, - @r###" - PROPERTY_DRAWER@0..91 - DRAWER_BEGIN@0..13 - COLON@0..1 ":" - TEXT@1..11 "PROPERTIES" - COLON@11..12 ":" - NEW_LINE@12..13 "\n" - NODE_PROPERTY@13..59 - COLON@13..14 ":" - TEXT@14..33 "header-args:clojure" - COLON@33..34 ":" - WHITESPACE@34..38 " " - TEXT@38..58 ":session *clojure-1*" - NEW_LINE@58..59 "\n" - NODE_PROPERTY@59..72 - COLON@59..60 ":" - TEXT@60..64 "NAME" - COLON@64..65 ":" - WHITESPACE@65..66 " " - TEXT@66..71 "VALUE" - NEW_LINE@71..72 "\n" - NODE_PROPERTY@72..86 - COLON@72..73 ":" - TEXT@73..77 "NAME" - PLUS@77..78 "+" - COLON@78..79 ":" - WHITESPACE@79..80 " " - TEXT@80..85 "VALUE" - NEW_LINE@85..86 "\n" - DRAWER_END@86..91 - COLON@86..87 ":" - TEXT@87..90 "END" - COLON@90..91 ":" - "### - ); - - let config = &ParseConfig::default(); - - // https://github.com/PoiScript/orgize/issues/9 - assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err()); - - assert!(property_drawer_node((":PROPERTIES:\n:NAME:VALUE\n:END:", config).into()).is_err()); -} diff --git a/src/syntax/dyn_block.rs b/src/syntax/dyn_block.rs deleted file mode 100644 index b2b4a09..0000000 --- a/src/syntax/dyn_block.rs +++ /dev/null @@ -1,107 +0,0 @@ -use nom::{ - bytes::complete::tag_no_case, - character::complete::{alpha1, space0, space1}, - sequence::tuple, - IResult, InputTake, -}; - -use super::{ - combinator::{ - blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder, - }, - input::Input, - SyntaxKind::*, -}; - -fn dyn_block_node_base(input: Input) -> IResult { - let (input, begin) = dyn_block_begin_node(input)?; - let (input, pre_blank) = blank_lines(input)?; - - for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { - if let Ok((input, end)) = dyn_block_end_node(input) { - let (input, post_blank) = blank_lines(input)?; - let mut children = vec![begin]; - children.extend(pre_blank); - children.push(contents.text_token()); - children.push(end); - children.extend(post_blank); - - return Ok((input, node(DYN_BLOCK, children))); - } - } - - Err(nom::Err::Error(())) -} - -fn dyn_block_begin_node(input: Input) -> IResult { - let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple(( - space0, - tag_no_case("#+BEGIN:"), - space1, - alpha1, - trim_line_end, - ))(input)?; - - let mut b = NodeBuilder::new(); - b.ws(ws); - b.text(begin); - b.ws(ws_); - b.text(name); - b.text(args); - b.ws(ws__); - b.nl(nl); - - Ok((input, b.finish(DYN_BLOCK_BEGIN))) -} - -fn dyn_block_end_node(input: Input) -> IResult { - let (input, (ws, end, ws_, nl)) = - tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?; - - let mut b = NodeBuilder::new(); - b.ws(ws); - b.text(end); - b.ws(ws_); - b.nl(nl); - - Ok((input, b.finish(DYN_BLOCK_END))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn dyn_block_node(input: Input) -> IResult { - crate::lossless_parser!(dyn_block_node_base, input) -} - -#[test] -fn parse() { - use crate::{ast::DynBlock, tests::to_ast}; - - let to_dyn_block = to_ast::(dyn_block_node); - - insta::assert_debug_snapshot!( - to_dyn_block( - r#"#+BEGIN: clocktable :scope file - -CONTENTS -#+END: - "#).syntax, - @r###" - DYN_BLOCK@0..53 - DYN_BLOCK_BEGIN@0..32 - TEXT@0..8 "#+BEGIN:" - WHITESPACE@8..9 " " - TEXT@9..19 "clocktable" - TEXT@19..31 " :scope file" - NEW_LINE@31..32 "\n" - BLANK_LINE@32..33 "\n" - TEXT@33..42 "CONTENTS\n" - DYN_BLOCK_END@42..49 - TEXT@42..48 "#+END:" - NEW_LINE@48..49 "\n" - BLANK_LINE@49..53 " " - "### - ); -} diff --git a/src/syntax/element.rs b/src/syntax/element.rs deleted file mode 100644 index aa4b88a..0000000 --- a/src/syntax/element.rs +++ /dev/null @@ -1,339 +0,0 @@ -use std::iter::once; - -use memchr::memchr2_iter; -use nom::{IResult, InputTake}; - -use super::{ - block::block_node, - clock::clock_node, - combinator::GreenElement, - comment::comment_node, - drawer::drawer_node, - dyn_block::dyn_block_node, - fixed_width::fixed_width_node, - fn_def::fn_def_node, - input::Input, - keyword::{affiliated_keyword_nodes, keyword_node}, - latex_environment::latex_environment_node, - list::list_node, - paragraph::{paragraph_node, paragraph_nodes}, - rule::rule_node, - table::{org_table_node, table_el_node}, -}; - -/// Recognizes multiple org-mode elements -/// -/// input must not contains blank line in the beginning -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn element_nodes(input: Input) -> Result, nom::Err<()>> { - debug_assert!(!input.is_empty()); - // TODO: - // debug_assert!( - // blank_lines(input).unwrap().1.is_empty(), - // "input must not starts with blank lines: {:?}", - // input.s - // ); - - let mut i = input; - let mut nodes = vec![]; - - 'l: while !i.is_empty() { - for (input, head) in ElementPositions::new(i) { - if let Ok((input, element)) = element_node(input) { - if !head.is_empty() { - nodes.extend(paragraph_nodes(head)?); - } - nodes.push(element); - debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); - i = input; - continue 'l; - } - } - nodes.extend(paragraph_nodes(i)?); - break; - } - - debug_assert_eq!( - input.as_str(), - nodes.iter().fold(String::new(), |s, n| s + &n.to_string()), - "parser must be lossless" - ); - - Ok(nodes) -} - -/// Recognizes an org-mode element expect paragraph -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn element_node(input: Input) -> IResult { - // skip affiliated keyword first - let (i, nodes) = affiliated_keyword_nodes(input)?; - - let has_affiliated_keyword = !nodes.is_empty(); - - // find first non-whitespace character - let byte = i.bytes().find(|&b| b != b' ' && b != b'\t'); - - debug_assert!( - !(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))), - "affiliated_keyword must not followed by blank lines: {:?}", - input.s - ); - - let result = match byte { - Some(b'[') => fn_def_node(input), - Some(b'0'..=b'9') | Some(b'*') => list_node(input), - // clock doesn't have affiliated keywords - Some(b'C') if !has_affiliated_keyword => clock_node(input), - Some(b'-') => rule_node(input).or_else(|_| list_node(input)), - Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)), - Some(b'|') => org_table_node(input), - Some(b'+') => table_el_node(input).or_else(|_| list_node(input)), - Some(b'#') => block_node(input) - .or_else(|_| keyword_node(input)) - .or_else(|_| dyn_block_node(input)) - .or_else(|_| comment_node(input)), - Some(b'\\') => latex_environment_node(input), - _ => Err(nom::Err::Error(())), - }; - - if has_affiliated_keyword { - result.or_else(|_| paragraph_node(input)) - } else { - result - } -} - -struct ElementPositions<'a> { - input: Input<'a>, - pos: usize, -} - -impl<'a> ElementPositions<'a> { - fn new(input: Input<'a>) -> Self { - ElementPositions { input, pos: 0 } - } -} - -impl<'a> Iterator for ElementPositions<'a> { - type Item = (Input<'a>, Input<'a>); - - fn next(&mut self) -> Option { - if self.pos >= self.input.s.len() { - return None; - } - - let bytes = &self.input.as_bytes()[self.pos..]; - - let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1)); - - while let Some(i) = iter.next() { - let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?; - - if matches!( - b, - b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\' - ) { - let previous = self.pos; - self.pos = iter - .next() - .map_or_else(|| self.input.s.len(), |i| i + self.pos); - - debug_assert!( - previous < self.pos && self.pos <= self.input.s.len(), - "{} < {} < {}", - previous, - self.pos, - self.input.s.len() - ); - - let (input, head) = self.input.take_split(i + previous); - - return Some((input, head)); - } - } - - None - } -} - -#[test] -fn positions() { - let config = crate::ParseConfig::default(); - let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n"; - let vec = ElementPositions::new((s, &config).into()).collect::>(); - assert_eq!(vec.len(), 5); - assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n"); - assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n"); - assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n"); - assert_eq!(vec[3].0.s, "\t\t[\n: \r\n"); - assert_eq!(vec[4].0.s, ": \r\n"); -} - -#[test] -fn parse() { - use crate::syntax::{SyntaxKind, SyntaxNode}; - use crate::{syntax::combinator::node, ParseConfig}; - - let t = |input: &str| { - let config = &ParseConfig::default(); - let children = element_nodes((input, config).into()).unwrap(); - SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap()) - }; - - // paragraph stops at blank lines - insta::assert_debug_snapshot!( - t(r#"a - -b"#), - @r###" - SECTION@0..4 - PARAGRAPH@0..3 - TEXT@0..2 "a\n" - BLANK_LINE@2..3 "\n" - PARAGRAPH@3..4 - TEXT@3..4 "b" - "### - ); - - // paragraph followed by special element - insta::assert_debug_snapshot!( - t("Table:\n|cell"), - @r###" - SECTION@0..12 - PARAGRAPH@0..7 - TEXT@0..7 "Table:\n" - ORG_TABLE@7..12 - ORG_TABLE_STANDARD_ROW@7..12 - PIPE@7..8 "|" - ORG_TABLE_CELL@8..12 - TEXT@8..12 "cell" - "### - ); -} - -#[test] -fn affiliated_keywords() { - use crate::syntax::{SyntaxKind, SyntaxNode}; - use crate::{syntax::combinator::node, ParseConfig}; - - let t = |input: &str| { - let config = &ParseConfig::default(); - let children = element_nodes((input, config).into()).unwrap(); - SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap()) - }; - - // affiliated keywords + paragraph - insta::assert_debug_snapshot!( - t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"), - @r###" - SECTION@0..41 - PARAGRAPH@0..41 - AFFILIATED_KEYWORD@0..26 - HASH_PLUS@0..2 "#+" - TEXT@2..11 "ATTR_HTML" - COLON@11..12 ":" - TEXT@12..25 " :width 300px" - NEW_LINE@25..26 "\n" - LINK@26..41 - L_BRACKET2@26..28 "[[" - LINK_PATH@28..39 "./img/a.jpg" - R_BRACKET2@39..41 "]]" - "### - ); - - // affiliated keywords + blank lines, fallback to normal keyword - insta::assert_debug_snapshot!( - t("#+ATTR_HTML: :width 300px\n#+CAPTION: abc\n\n[[./img/a.jpg]]"), - @r###" - SECTION@0..57 - KEYWORD@0..26 - HASH_PLUS@0..2 "#+" - TEXT@2..11 "ATTR_HTML" - COLON@11..12 ":" - TEXT@12..25 " :width 300px" - NEW_LINE@25..26 "\n" - KEYWORD@26..42 - HASH_PLUS@26..28 "#+" - TEXT@28..35 "CAPTION" - COLON@35..36 ":" - TEXT@36..40 " abc" - NEW_LINE@40..41 "\n" - BLANK_LINE@41..42 "\n" - PARAGRAPH@42..57 - LINK@42..57 - L_BRACKET2@42..44 "[[" - LINK_PATH@44..55 "./img/a.jpg" - R_BRACKET2@55..57 "]]" - "### - ); - - // affiliated keywords + special element - insta::assert_debug_snapshot!( - t("#+CAPTION: a footnote def\n[fn:WORD] https://orgmode.org"), - @r###" - SECTION@0..55 - FN_DEF@0..55 - AFFILIATED_KEYWORD@0..26 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "CAPTION" - COLON@9..10 ":" - TEXT@10..25 " a footnote def" - NEW_LINE@25..26 "\n" - L_BRACKET@26..27 "[" - TEXT@27..29 "fn" - COLON@29..30 ":" - TEXT@30..34 "WORD" - R_BRACKET@34..35 "]" - TEXT@35..55 " https://orgmode.org" - "### - ); - - // affiliated keywords + clock - insta::assert_debug_snapshot!( - t("#+CAPTION: a footnote def\nCLOCK: [2003-09-16 Tue 09:39]"), - @r###" - SECTION@0..55 - PARAGRAPH@0..55 - AFFILIATED_KEYWORD@0..26 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "CAPTION" - COLON@9..10 ":" - TEXT@10..25 " a footnote def" - NEW_LINE@25..26 "\n" - TEXT@26..33 "CLOCK: " - TIMESTAMP_INACTIVE@33..55 - L_BRACKET@33..34 "[" - TIMESTAMP_YEAR@34..38 "2003" - MINUS@38..39 "-" - TIMESTAMP_MONTH@39..41 "09" - MINUS@41..42 "-" - TIMESTAMP_DAY@42..44 "16" - WHITESPACE@44..45 " " - TIMESTAMP_DAYNAME@45..48 "Tue" - WHITESPACE@48..49 " " - TIMESTAMP_HOUR@49..51 "09" - COLON@51..52 ":" - TIMESTAMP_MINUTE@52..54 "39" - R_BRACKET@54..55 "]" - "### - ); - - // affiliated keywords + eof - insta::assert_debug_snapshot!( - t("#+CAPTION: Longer caption."), - @r###" - SECTION@0..26 - KEYWORD@0..26 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "CAPTION" - COLON@9..10 ":" - TEXT@10..26 " Longer caption." - "### - ); -} diff --git a/src/syntax/emphasis.rs b/src/syntax/emphasis.rs deleted file mode 100644 index d3fb710..0000000 --- a/src/syntax/emphasis.rs +++ /dev/null @@ -1,186 +0,0 @@ -use bytecount::count; -use memchr::memchr_iter; -use nom::{combinator::map, IResult, Slice}; - -use super::{ - combinator::{node, token, GreenElement}, - input::Input, - object::standard_object_nodes, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn bold_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'*'), |contents| { - let mut children = vec![token(STAR, "*")]; - children.extend(standard_object_nodes(contents)); - children.push(token(STAR, "*")); - node(BOLD, children) - }); - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn code_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'~'), |contents| { - node( - CODE, - [token(TILDE, "~"), contents.text_token(), token(TILDE, "~")], - ) - }); - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn strike_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'+'), |contents| { - let mut children = vec![token(PLUS, "+")]; - children.extend(standard_object_nodes(contents)); - children.push(token(PLUS, "+")); - node(STRIKE, children) - }); - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn verbatim_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'='), |contents| { - node( - VERBATIM, - [token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")], - ) - }); - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn underline_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'_'), |contents| { - let mut children = vec![token(UNDERSCORE, "_")]; - children.extend(standard_object_nodes(contents)); - children.push(token(UNDERSCORE, "_")); - node(UNDERLINE, children) - }); - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn italic_node(input: Input) -> IResult { - let mut parser = map(emphasis(b'/'), |contents| { - let mut children = vec![token(SLASH, "/")]; - children.extend(standard_object_nodes(contents)); - children.push(token(SLASH, "/")); - node(ITALIC, children) - }); - crate::lossless_parser!(parser, input) -} - -fn emphasis(marker: u8) -> impl Fn(Input) -> IResult { - move |input: Input| { - let bytes = input.as_bytes(); - - if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() { - return Err(nom::Err::Error(())); - } - - for idx in memchr_iter(marker, bytes).skip(1) { - // contains at least one character - if idx == 1 { - continue; - } else if count(&bytes[1..idx], b'\n') >= 2 { - break; - } else if validate_marker(idx, input) { - return Ok((input.slice(idx + 1..), input.slice(1..idx))); - } - } - - Err(nom::Err::Error(())) - } -} - -fn validate_marker(pos: usize, text: Input) -> bool { - if text.as_bytes()[pos - 1].is_ascii_whitespace() { - false - } else if let Some(post) = text.as_bytes().get(pos + 1) { - [ - b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}', - b'[', - ] - .contains(post) - } else { - true - } -} - -pub fn verify_pre(input: &str) -> bool { - if input.is_empty() { - return true; - } - matches!( - input.as_bytes()[input.len() - 1], - b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n' - ) -} - -#[test] -fn parse() { - use crate::{ast::Bold, tests::to_ast, ParseConfig}; - - let to_bold = to_ast::(bold_node); - - insta::assert_debug_snapshot!( - to_bold("*bold*").syntax, - @r###" - BOLD@0..6 - STAR@0..1 "*" - TEXT@1..5 "bold" - STAR@5..6 "*" - "### - ); - - insta::assert_debug_snapshot!( - to_bold("*bo*ld*").syntax, - @r###" - BOLD@0..7 - STAR@0..1 "*" - TEXT@1..6 "bo*ld" - STAR@6..7 "*" - "### - ); - - insta::assert_debug_snapshot!( - to_bold("*bo\nld*").syntax, - @r###" - BOLD@0..7 - STAR@0..1 "*" - TEXT@1..6 "bo\nld" - STAR@6..7 "*" - "### - ); - - let config = &ParseConfig::default(); - - assert!(bold_node(("*bold*a", config).into()).is_err()); - assert!(bold_node(("*bold *", config).into()).is_err()); - assert!(bold_node(("* bold*", config).into()).is_err()); - assert!(bold_node(("*b\nol\nd*", config).into()).is_err()); - assert!(italic_node(("*bold*", config).into()).is_err()); -} diff --git a/src/syntax/entity.rs b/src/syntax/entity.rs deleted file mode 100644 index 056126e..0000000 --- a/src/syntax/entity.rs +++ /dev/null @@ -1,120 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_while_m_n}, - character::complete::alphanumeric1, - combinator::opt, - IResult, -}; - -use crate::{ - entities::ENTITIES, - syntax::combinator::{backslash_token, node}, - SyntaxKind, -}; - -use super::{combinator::GreenElement, input::Input}; - -pub fn entity_node(input: Input) -> IResult { - debug_assert!(input.s.starts_with('\\')); - let mut parser = alt((template1, template2)); - crate::lossless_parser!(parser, input) -} - -// \NAME POST or // \NAME{} -fn template1(input: Input) -> IResult { - let (input, backslash) = backslash_token(input)?; - let (input, name) = alphanumeric1(input)?; - - if ENTITIES.iter().all(|i| i.0 != name.s) { - return Err(nom::Err::Error(())); - } - let (input, brackets) = opt(tag("{}"))(input)?; - - if let Some(brackets) = brackets { - return Ok(( - input, - node( - SyntaxKind::ENTITY, - [backslash, name.text_token(), brackets.text_token()], - ), - )); - } - - if let Some(post) = input.bytes().next() { - if post.is_ascii_alphabetic() { - return Err(nom::Err::Error(())); - } - } - - Ok(( - input, - node(SyntaxKind::ENTITY, [backslash, name.text_token()]), - )) -} - -// \_SPACES -fn template2(input: Input) -> IResult { - let (input, backslash) = backslash_token(input)?; - let (input, underscore) = tag("_")(input)?; - let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?; - Ok(( - input, - node( - SyntaxKind::ENTITY, - [ - backslash, - underscore.token(SyntaxKind::UNDERSCORE), - spaces.text_token(), - ], - ), - )) -} - -#[test] -fn parse() { - use crate::{ast::Entity, tests::to_ast, ParseConfig}; - - let to_entity = to_ast::(entity_node); - - insta::assert_debug_snapshot!( - to_entity("\\cent").syntax, - @r###" - ENTITY@0..5 - BACKSLASH@0..1 "\\" - TEXT@1..5 "cent" - "### - ); - - insta::assert_debug_snapshot!( - to_entity("\\S").syntax, - @r###" - ENTITY@0..2 - BACKSLASH@0..1 "\\" - TEXT@1..2 "S" - "### - ); - - insta::assert_debug_snapshot!( - to_entity("\\frac12{}test").syntax, - @r###" - ENTITY@0..9 - BACKSLASH@0..1 "\\" - TEXT@1..7 "frac12" - TEXT@7..9 "{}" - "### - ); - - insta::assert_debug_snapshot!( - to_entity("\\_ ").syntax, - @r###" - ENTITY@0..21 - BACKSLASH@0..1 "\\" - UNDERSCORE@1..2 "_" - TEXT@2..21 " " - "### - ); - - let c = ParseConfig::default(); - - assert!(entity_node(("\\poi", &c).into()).is_err()); -} diff --git a/src/syntax/fixed_width.rs b/src/syntax/fixed_width.rs deleted file mode 100644 index 9a89e93..0000000 --- a/src/syntax/fixed_width.rs +++ /dev/null @@ -1,100 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_while}, - character::complete::{space0, space1}, - combinator::{iterator, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, - input::Input, - keyword::affiliated_keyword_nodes, - SyntaxKind, -}; - -fn fixed_width_node_base(input: Input) -> IResult { - let mut b = NodeBuilder::new(); - - let (input, keywords) = affiliated_keyword_nodes(input)?; - b.children.extend(keywords); - - let mut iter = iterator( - input, - opt(tuple(( - space0, - tag(":"), - opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), - eol_or_eof, - ))), - ); - - for (idx, option) in iter.enumerate() { - match option { - Some((ws, common, content, eol)) => { - b.ws(ws); - b.token(SyntaxKind::COMMA, common); - if let Some((ws, text)) = content { - b.ws(ws); - b.text(text); - } - b.text(eol); - } - _ if idx == 0 => return Err(nom::Err::Error(())), - _ => break, - } - } - - let (input, _) = iter.finish()?; - - let (input, post_blank) = blank_lines(input)?; - - b.children.extend(post_blank); - - Ok((input, b.finish(SyntaxKind::FIXED_WIDTH))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn fixed_width_node(input: Input) -> IResult { - crate::lossless_parser!(fixed_width_node_base, input) -} - -#[test] -fn parse() { - use crate::{ast::FixedWidth, tests::to_ast}; - - let to_fixed_width = to_ast::(fixed_width_node); - - insta::assert_debug_snapshot!( - to_fixed_width( - r#": A -: -: B -: C - - "# - ).syntax, - @r###" - FIXED_WIDTH@0..19 - COMMA@0..1 ":" - WHITESPACE@1..2 " " - TEXT@2..3 "A" - TEXT@3..4 "\n" - COMMA@4..5 ":" - TEXT@5..6 "\n" - COMMA@6..7 ":" - WHITESPACE@7..8 " " - TEXT@8..9 "B" - TEXT@9..10 "\n" - COMMA@10..11 ":" - WHITESPACE@11..12 " " - TEXT@12..13 "C" - TEXT@13..14 "\n" - BLANK_LINE@14..15 "\n" - BLANK_LINE@15..19 " " - "### - ); -} diff --git a/src/syntax/fn_def.rs b/src/syntax/fn_def.rs deleted file mode 100644 index 10346dc..0000000 --- a/src/syntax/fn_def.rs +++ /dev/null @@ -1,157 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_while1}, - combinator::map, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement, - NodeBuilder, - }, - input::Input, - keyword::affiliated_keyword_nodes, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn fn_def_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - affiliated_keyword_nodes, - l_bracket_token, - tag("fn"), - colon_token, - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - r_bracket_token, - trim_line_end, - blank_lines, - )), - |( - affiliated_keywords, - l_bracket, - fn_, - colon, - label, - r_bracket, - (content, ws_, nl), - post_blank, - )| { - let mut b = NodeBuilder::new(); - b.children.extend(affiliated_keywords); - b.push(l_bracket); - b.text(fn_); - b.push(colon); - b.text(label); - b.push(r_bracket); - b.text(content); - b.ws(ws_); - b.nl(nl); - b.children.extend(post_blank); - b.finish(SyntaxKind::FN_DEF) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::ParseConfig; - use crate::{ast::FnDef, tests::to_ast}; - - let to_fn_def = to_ast::(fn_def_node); - - insta::assert_debug_snapshot!( - to_fn_def("[fn:1] https://orgmode.org").syntax, - @r###" - FN_DEF@0..26 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..5 "1" - R_BRACKET@5..6 "]" - TEXT@6..26 " https://orgmode.org" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_def("[fn:word_1] https://orgmode.org").syntax, - @r###" - FN_DEF@0..31 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..10 "word_1" - R_BRACKET@10..11 "]" - TEXT@11..31 " https://orgmode.org" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_def("[fn:WORD-1] https://orgmode.org").syntax, - @r###" - FN_DEF@0..31 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..10 "WORD-1" - R_BRACKET@10..11 "]" - TEXT@11..31 " https://orgmode.org" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_def("[fn:WORD]").syntax, - @r###" - FN_DEF@0..9 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..8 "WORD" - R_BRACKET@8..9 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax, - @r###" - FN_DEF@0..66 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..5 "1" - R_BRACKET@5..6 "]" - TEXT@6..65 " In particular, the p ..." - NEW_LINE@65..66 "\n" - "### - ); - - let config = &ParseConfig::default(); - - assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err()); - assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err()); - assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err()); - - insta::assert_debug_snapshot!( - to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax, - @r###" - FN_DEF@0..45 - AFFILIATED_KEYWORD@0..14 - HASH_PLUS@0..2 "#+" - TEXT@2..10 "ATTR_poi" - COLON@10..11 ":" - TEXT@11..13 " 1" - NEW_LINE@13..14 "\n" - L_BRACKET@14..15 "[" - TEXT@15..17 "fn" - COLON@17..18 ":" - TEXT@18..24 "WORD-1" - R_BRACKET@24..25 "]" - TEXT@25..45 " https://orgmode.org" - "### - ); -} diff --git a/src/syntax/fn_ref.rs b/src/syntax/fn_ref.rs deleted file mode 100644 index c190825..0000000 --- a/src/syntax/fn_ref.rs +++ /dev/null @@ -1,122 +0,0 @@ -use memchr::memchr2_iter; -use nom::{ - bytes::complete::{tag, take_while}, - combinator::opt, - sequence::tuple, - Err, IResult, InputTake, -}; - -use super::{ - combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement}, - input::Input, - object::standard_object_nodes, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn fn_ref_node(input: Input) -> IResult { - crate::lossless_parser!(fn_ref_node_base, input) -} - -fn fn_ref_node_base(input: Input) -> IResult { - let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple(( - l_bracket_token, - tag("fn"), - colon_token, - take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - opt(tuple((colon_token, balanced_brackets))), - r_bracket_token, - ))(input)?; - - let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()]; - if let Some((colon, definition)) = definition { - children.push(colon); - children.extend(standard_object_nodes(definition)); - } - children.push(r_bracket); - - Ok((input, node(FN_REF, children))) -} - -fn balanced_brackets(input: Input) -> IResult { - let mut pairs = 1; - let bytes = input.as_bytes(); - for i in memchr2_iter(b'[', b']', bytes) { - if bytes[i] == b'[' { - pairs += 1; - } else if pairs != 1 { - pairs -= 1; - } else { - return Ok(input.take_split(i)); - } - } - Err(Err::Error(())) -} - -#[test] -fn parse() { - use crate::{ast::FnRef, tests::to_ast, ParseConfig}; - - let to_fn_ref = to_ast::(fn_ref_node); - - insta::assert_debug_snapshot!( - to_fn_ref("[fn:1]").syntax, - @r###" - FN_REF@0..6 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..5 "1" - R_BRACKET@5..6 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_ref("[fn:1:2]").syntax, - @r###" - FN_REF@0..8 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..5 "1" - COLON@5..6 ":" - TEXT@6..7 "2" - R_BRACKET@7..8 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_ref("[fn::2]").syntax, - @r###" - FN_REF@0..7 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..4 "" - COLON@4..5 ":" - TEXT@5..6 "2" - R_BRACKET@6..7 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_fn_ref("[fn::[]]").syntax, - @r###" - FN_REF@0..8 - L_BRACKET@0..1 "[" - TEXT@1..3 "fn" - COLON@3..4 ":" - TEXT@4..4 "" - COLON@4..5 ":" - TEXT@5..7 "[]" - R_BRACKET@7..8 "]" - "### - ); - - let config = &ParseConfig::default(); - - assert!(fn_ref_node(("[fn::[]", config).into()).is_err()); -} diff --git a/src/syntax/headline.rs b/src/syntax/headline.rs deleted file mode 100644 index d094d30..0000000 --- a/src/syntax/headline.rs +++ /dev/null @@ -1,369 +0,0 @@ -use memchr::memrchr_iter; -use nom::{ - bytes::complete::take_while1, - character::complete::{anychar, space0}, - combinator::{map, opt}, - sequence::tuple, - IResult, InputTake, Slice, -}; - -use super::{ - combinator::{ - hash_token, l_bracket_token, line_starts_iter, node, r_bracket_token, token, trim_line_end, - GreenElement, NodeBuilder, - }, - drawer::property_drawer_node, - element::element_nodes, - input::Input, - object::standard_object_nodes, - planning::planning_node, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn headline_node(input: Input) -> IResult { - debug_assert!(!input.is_empty()); - crate::lossless_parser!(headline_node_base, input) -} - -fn headline_node_base(input: Input) -> IResult { - let (input, stars) = headline_stars(input)?; - - let mut b = NodeBuilder::new(); - - b.token(HEADLINE_STARS, stars); - - let (input, ws) = space0(input)?; - b.ws(ws); - - let (input, headline_keyword) = opt(headline_keyword_token)(input)?; - - if let Some((headline_keyword, ws)) = headline_keyword { - b.push(headline_keyword); - b.ws(ws); - } - - let (input, headline_priority) = opt(headline_priority_node)(input)?; - - if let Some((headline_priority, ws)) = headline_priority { - b.push(headline_priority); - b.ws(ws); - } - - let (input, (title_and_tags, ws_, nl)) = trim_line_end(input)?; - let (title, tags) = opt(headline_tags_node)(title_and_tags)?; - - if !title.is_empty() { - b.push(node(HEADLINE_TITLE, standard_object_nodes(title))); - } - b.push_opt(tags); - b.ws(ws_); - b.nl(nl); - - if input.is_empty() { - return Ok((input, b.finish(HEADLINE))); - } - - let (input, planning) = opt(planning_node)(input)?; - b.push_opt(planning); - - if input.is_empty() { - return Ok((input, b.finish(HEADLINE))); - } - - let (input, property_drawer) = opt(property_drawer_node)(input)?; - b.push_opt(property_drawer); - - if input.is_empty() { - return Ok((input, b.finish(HEADLINE))); - } - - let (input, section) = opt(section_node)(input)?; - b.push_opt(section); - - let mut i = input; - let current_level = stars.len(); - while !i.is_empty() { - let next_level = i.bytes().take_while(|&c| c == b'*').count(); - - if next_level <= current_level { - break; - } - - let (input, headline) = headline_node(i)?; - b.push(headline); - debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); - i = input; - } - - Ok((i, b.finish(HEADLINE))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn section_node(input: Input) -> IResult { - debug_assert!(!input.is_empty()); - let (input, section) = section_text(input)?; - Ok((input, node(SECTION, element_nodes(section)?))) -} - -fn section_text(input: Input) -> IResult { - for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { - if headline_stars(input).is_ok() { - if section.is_empty() { - return Err(nom::Err::Error(())); - } - - return Ok((input, section)); - } - } - - Ok(input.take_split(input.len())) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn headline_stars(input: Input) -> IResult { - let bytes = input.as_bytes(); - let level = bytes.iter().take_while(|&&c| c == b'*').count(); - - if level == 0 { - Err(nom::Err::Error(())) - } - // headline stars must be followed by space - else if matches!(bytes.get(level), Some(b' ')) { - Ok(input.take_split(level)) - } else { - Err(nom::Err::Error(())) - } -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn headline_tags_node(input: Input) -> IResult { - if !input.s.ends_with(':') { - return Err(nom::Err::Error(())); - }; - - let bytes = input.as_bytes(); - - // we're going to skip to first colon, so we start from the - // second last character - let mut i = input.len() - 1; - let mut can_not_be_ws = true; - let mut children = vec![token(COLON, ":")]; - - for ii in memrchr_iter(b':', bytes).skip(1) { - let item = &bytes[ii + 1..i]; - - if item.is_empty() { - children.push(token(COLON, ":")); - can_not_be_ws = false; - debug_assert!(i > ii, "{} > {}", i, ii); - i = ii; - } else if String::from_utf8_lossy(item) - .chars() - // https://github.com/yyr/org-mode/blob/d8494b5668ad4d4e68e83228ae8451eaa01d2220/lisp/org-element.el#L922C25-L922C32 - .all(|c| c.is_alphanumeric() || c == '_' || c == '@' || c == '#' || c == '%') - { - children.push(input.slice(ii + 1..i).text_token()); - children.push(token(COLON, ":")); - can_not_be_ws = false; - debug_assert!(i > ii, "{} > {}", i, ii); - i = ii; - } else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws { - children.push(input.slice(ii + 1..i).ws_token()); - children.push(token(COLON, ":")); - can_not_be_ws = true; - debug_assert!(i > ii, "{} > {}", i, ii); - i = ii; - } else { - break; - } - } - - if children.len() <= 2 { - return Err(nom::Err::Error(())); - } - - if i != 0 && bytes[i - 1] != b' ' && bytes[i - 1] != b'\t' { - return Err(nom::Err::Error(())); - } - - // we parse headline tag from right to left, - // so we need to reverse the result after it finishes - children.reverse(); - - Ok((input.slice(0..i), node(HEADLINE_TAGS, children))) -} - -fn headline_keyword_token(input: Input) -> IResult { - let (input, word) = take_while1(|c: char| !c.is_ascii_whitespace())(input)?; - let (input, ws) = space0(input)?; - if input.c.todo_keywords.0.iter().any(|k| k == word.s) { - Ok((input, (word.token(HEADLINE_KEYWORD_TODO), ws))) - } else if input.c.todo_keywords.1.iter().any(|k| k == word.s) { - Ok((input, (word.token(HEADLINE_KEYWORD_DONE), ws))) - } else { - Err(nom::Err::Error(())) - } -} - -fn headline_priority_node(input: Input) -> IResult { - let (input, node) = map( - tuple((l_bracket_token, hash_token, anychar, r_bracket_token)), - |(l_bracket, hash, char, r_bracket)| { - node( - HEADLINE_PRIORITY, - [l_bracket, hash, token(TEXT, &char.to_string()), r_bracket], - ) - }, - )(input)?; - - let (input, ws) = space0(input)?; - - Ok((input, (node, ws))) -} - -#[test] -fn parse() { - use crate::{ast::Headline, tests::to_ast, ParseConfig}; - - let to_headline = to_ast::(headline_node); - - insta::assert_debug_snapshot!( - to_headline("* foo").syntax, - @r###" - HEADLINE@0..5 - HEADLINE_STARS@0..1 "*" - WHITESPACE@1..2 " " - HEADLINE_TITLE@2..5 - TEXT@2..5 "foo" - "### - ); - - insta::assert_debug_snapshot!( - to_headline("* foo\n\n** bar").syntax, - @r###" - HEADLINE@0..13 - HEADLINE_STARS@0..1 "*" - WHITESPACE@1..2 " " - HEADLINE_TITLE@2..5 - TEXT@2..5 "foo" - NEW_LINE@5..6 "\n" - SECTION@6..7 - PARAGRAPH@6..7 - BLANK_LINE@6..7 "\n" - HEADLINE@7..13 - HEADLINE_STARS@7..9 "**" - WHITESPACE@9..10 " " - HEADLINE_TITLE@10..13 - TEXT@10..13 "bar" - "### - ); - - insta::assert_debug_snapshot!( - to_headline("* TODO foo\nbar\n** baz\n").syntax, - @r###" - HEADLINE@0..22 - HEADLINE_STARS@0..1 "*" - WHITESPACE@1..2 " " - HEADLINE_KEYWORD_TODO@2..6 "TODO" - WHITESPACE@6..7 " " - HEADLINE_TITLE@7..10 - TEXT@7..10 "foo" - NEW_LINE@10..11 "\n" - SECTION@11..15 - PARAGRAPH@11..15 - TEXT@11..15 "bar\n" - HEADLINE@15..22 - HEADLINE_STARS@15..17 "**" - WHITESPACE@17..18 " " - HEADLINE_TITLE@18..21 - TEXT@18..21 "baz" - NEW_LINE@21..22 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_headline("** [#A] foo\n* baz").syntax, - @r###" - HEADLINE@0..12 - HEADLINE_STARS@0..2 "**" - WHITESPACE@2..3 " " - HEADLINE_PRIORITY@3..7 - L_BRACKET@3..4 "[" - HASH@4..5 "#" - TEXT@5..6 "A" - R_BRACKET@6..7 "]" - WHITESPACE@7..8 " " - HEADLINE_TITLE@8..11 - TEXT@8..11 "foo" - NEW_LINE@11..12 "\n" - "### - ); - - let config = &ParseConfig::default(); - - assert!(headline_node(("_ ", config).into()).is_err()); - assert!(headline_node(("*", config).into()).is_err()); - assert!(headline_node((" * ", config).into()).is_err()); - assert!(headline_node(("**", config).into()).is_err()); - assert!(headline_node(("**\n", config).into()).is_err()); - assert!(headline_node(("**\r", config).into()).is_err()); - assert!(headline_node(("**\t", config).into()).is_err()); -} - -#[test] -fn issue_15_16() { - use crate::{ast::Headline, tests::to_ast}; - - let to_headline = to_ast::(headline_node); - - assert!(to_headline("* a ::").tags().count() == 0); - assert!(to_headline("* a : :").tags().count() == 0); - assert!(to_headline("* a :(:").tags().count() == 0); - assert!(to_headline("* a :a: :").tags().count() == 0); - assert!(to_headline("* a :a :").tags().count() == 0); - assert!(to_headline("* a a:").tags().count() == 0); - assert!(to_headline("* a :a").tags().count() == 0); - - let tags = to_headline("* a \t:_:").tags(); - assert_eq!( - vec!["_".to_string()], - tags.map(|x| x.to_string()).collect::>(), - ); - - let tags = to_headline("* a \t :@:").tags(); - assert_eq!( - vec!["@".to_string()], - tags.map(|x| x.to_string()).collect::>(), - ); - - let tags = to_headline("* a :#:").tags(); - assert_eq!( - vec!["#".to_string()], - tags.map(|x| x.to_string()).collect::>(), - ); - - let tags = to_headline("* a\t :%:").tags(); - assert_eq!( - vec!["%".to_string()], - tags.map(|x| x.to_string()).collect::>(), - ); - - let tags = to_headline("* a :余: :破:").tags(); - assert_eq!( - vec!["余".to_string(), "破".to_string()], - tags.map(|x| x.to_string()).collect::>(), - ); -} diff --git a/src/syntax/inline_call.rs b/src/syntax/inline_call.rs deleted file mode 100644 index 0e8d058..0000000 --- a/src/syntax/inline_call.rs +++ /dev/null @@ -1,130 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_till}, - combinator::{map, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - l_bracket_token, l_parens_token, node, r_bracket_token, r_parens_token, GreenElement, - }, - input::Input, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn inline_call_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - tag("call_"), - take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'), - opt(tuple(( - l_bracket_token, - take_till(|c| c == ']' || c == '\n'), - r_bracket_token, - ))), - l_parens_token, - take_till(|c| c == ')' || c == '\n'), - r_parens_token, - opt(tuple(( - l_bracket_token, - take_till(|c| c == ']' || c == '\n'), - r_bracket_token, - ))), - )), - |(call, name, inside_header, l_paren, arguments, r_paren, end_header)| { - let mut children = vec![call.text_token()]; - children.push(name.text_token()); - if let Some((l_bracket, header, r_bracket)) = inside_header { - children.push(l_bracket); - children.push(header.text_token()); - children.push(r_bracket); - } - children.push(l_paren); - children.push(arguments.text_token()); - children.push(r_paren); - if let Some((l_bracket, header, r_bracket)) = end_header { - children.push(l_bracket); - children.push(header.text_token()); - children.push(r_bracket); - } - node(SyntaxKind::INLINE_CALL, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::InlineCall, tests::to_ast}; - - let to_inline_call = to_ast::(inline_call_node); - - let call = to_inline_call("call_square(4)"); - insta::assert_debug_snapshot!( - call.syntax, - @r###" - INLINE_CALL@0..14 - TEXT@0..5 "call_" - TEXT@5..11 "square" - L_PARENS@11..12 "(" - TEXT@12..13 "4" - R_PARENS@13..14 ")" - "### - ); - - let call = to_inline_call("call_square[:results output](4)"); - insta::assert_debug_snapshot!( - call.syntax, - @r###" - INLINE_CALL@0..31 - TEXT@0..5 "call_" - TEXT@5..11 "square" - L_BRACKET@11..12 "[" - TEXT@12..27 ":results output" - R_BRACKET@27..28 "]" - L_PARENS@28..29 "(" - TEXT@29..30 "4" - R_PARENS@30..31 ")" - "### - ); - - let call = to_inline_call("call_square(4)[:results html]"); - insta::assert_debug_snapshot!( - call.syntax, - @r###" - INLINE_CALL@0..29 - TEXT@0..5 "call_" - TEXT@5..11 "square" - L_PARENS@11..12 "(" - TEXT@12..13 "4" - R_PARENS@13..14 ")" - L_BRACKET@14..15 "[" - TEXT@15..28 ":results html" - R_BRACKET@28..29 "]" - "### - ); - - let call = to_inline_call("call_square[:results output](4)[:results html]"); - insta::assert_debug_snapshot!( - call.syntax, - @r###" - INLINE_CALL@0..46 - TEXT@0..5 "call_" - TEXT@5..11 "square" - L_BRACKET@11..12 "[" - TEXT@12..27 ":results output" - R_BRACKET@27..28 "]" - L_PARENS@28..29 "(" - TEXT@29..30 "4" - R_PARENS@30..31 ")" - L_BRACKET@31..32 "[" - TEXT@32..45 ":results html" - R_BRACKET@45..46 "]" - "### - ); -} diff --git a/src/syntax/inline_src.rs b/src/syntax/inline_src.rs deleted file mode 100644 index 112c01f..0000000 --- a/src/syntax/inline_src.rs +++ /dev/null @@ -1,88 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_till, take_while1}, - combinator::{map, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - l_bracket_token, l_curly_token, node, r_bracket_token, r_curly_token, GreenElement, - }, - input::Input, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn inline_src_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - tag("src_"), - take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{'), - opt(tuple(( - l_bracket_token, - take_till(|c| c == '\n' || c == ']'), - r_bracket_token, - ))), - l_curly_token, - take_till(|c| c == '\n' || c == '}'), - r_curly_token, - )), - |(src, lang, options, l_curly, body, r_curly)| { - let mut children = vec![src.text_token(), lang.text_token()]; - if let Some((l_bracket, options, r_bracket)) = options { - children.push(l_bracket); - children.push(options.text_token()); - children.push(r_bracket); - } - children.push(l_curly); - children.push(body.text_token()); - children.push(r_curly); - node(SyntaxKind::INLINE_SRC, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::InlineSrc, tests::to_ast, ParseConfig}; - - let to_inline_src = to_ast::(inline_src_node); - - insta::assert_debug_snapshot!( - to_inline_src("src_C{int a = 0;}").syntax, - @r###" - INLINE_SRC@0..17 - TEXT@0..4 "src_" - TEXT@4..5 "C" - L_CURLY@5..6 "{" - TEXT@6..16 "int a = 0;" - R_CURLY@16..17 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_inline_src("src_xml[:exports code]{text}").syntax, - @r###" - INLINE_SRC@0..39 - TEXT@0..4 "src_" - TEXT@4..7 "xml" - L_BRACKET@7..8 "[" - TEXT@8..21 ":exports code" - R_BRACKET@21..22 "]" - L_CURLY@22..23 "{" - TEXT@23..38 "text" - R_CURLY@38..39 "}" - "### - ); - - let config = &ParseConfig::default(); - - assert!(inline_src_node(("src_xml[:exports code]{text", config).into()).is_err()); - assert!(inline_src_node(("src_[:exports code]{text}", config).into()).is_err()); - assert!(inline_src_node(("src_xml[:exports code]", config).into()).is_err()); -} diff --git a/src/syntax/input.rs b/src/syntax/input.rs deleted file mode 100644 index 307948c..0000000 --- a/src/syntax/input.rs +++ /dev/null @@ -1,242 +0,0 @@ -use nom::{ - error::{ErrorKind, ParseError}, - Compare, CompareResult, Err, FindSubstring, IResult, InputIter, InputLength, InputTake, - InputTakeAtPosition, Needed, Offset, Slice, -}; -use std::{ - ops::{Deref, Range, RangeFrom, RangeFull, RangeTo}, - str::{CharIndices, Chars}, -}; - -use super::{ - combinator::{token, GreenElement}, - SyntaxKind, -}; -use crate::config::ParseConfig; - -/// A custom Input struct -/// -/// It helps us to pass the `ParseConfig` all the way down to each parsers -#[derive(Clone, Copy, Debug)] -pub struct Input<'a> { - pub(crate) s: &'a str, - pub(crate) c: &'a ParseConfig, -} - -impl<'a> Input<'a> { - #[inline] - pub(crate) fn of(&self, i: &'a str) -> Input<'a> { - Input { s: i, c: self.c } - } - - #[inline] - pub fn as_str(&self) -> &'a str { - self.s - } - - #[inline] - pub fn token(&self, kind: SyntaxKind) -> GreenElement { - token(kind, self.s) - } - - #[inline] - pub fn text_token(&self) -> GreenElement { - token(SyntaxKind::TEXT, self.s) - } - - #[inline] - pub fn ws_token(&self) -> GreenElement { - token(SyntaxKind::WHITESPACE, self.s) - } - - #[inline] - pub fn nl_token(&self) -> GreenElement { - token(SyntaxKind::NEW_LINE, self.s) - } -} - -impl<'a> Deref for Input<'a> { - type Target = str; - - #[inline] - fn deref(&self) -> &'a str { - self.s - } -} - -impl<'a> From<(&'a str, &'a ParseConfig)> for Input<'a> { - fn from(value: (&'a str, &'a ParseConfig)) -> Self { - Input { - s: value.0, - c: value.1, - } - } -} - -impl<'a> Slice> for Input<'a> { - fn slice(&self, range: Range) -> Self { - self.of(self.s.slice(range)) - } -} - -impl<'a> Slice> for Input<'a> { - fn slice(&self, range: RangeTo) -> Self { - self.of(self.s.slice(range)) - } -} - -impl<'a> Slice> for Input<'a> { - fn slice(&self, range: RangeFrom) -> Self { - self.of(self.s.slice(range)) - } -} - -impl<'a> Slice for Input<'a> { - fn slice(&self, range: RangeFull) -> Self { - self.of(self.s.slice(range)) - } -} - -impl<'a, 'b> FindSubstring<&'b str> for Input<'a> { - fn find_substring(&self, substr: &str) -> Option { - self.s.find(substr) - } -} - -impl<'a, 'b> Compare<&'b str> for Input<'a> { - #[inline] - fn compare(&self, t: &'b str) -> CompareResult { - self.s.compare(t) - } - - #[inline] - fn compare_no_case(&self, t: &'b str) -> CompareResult { - self.s.compare_no_case(t) - } -} - -impl<'a> InputLength for Input<'a> { - #[inline] - fn input_len(&self) -> usize { - self.len() - } -} - -impl<'a> InputIter for Input<'a> { - type Item = char; - type Iter = CharIndices<'a>; - type IterElem = Chars<'a>; - #[inline] - fn iter_indices(&self) -> Self::Iter { - self.s.char_indices() - } - #[inline] - fn iter_elements(&self) -> Self::IterElem { - self.s.chars() - } - fn position

    (&self, predicate: P) -> Option - where - P: Fn(Self::Item) -> bool, - { - self.s.position(predicate) - } - #[inline] - fn slice_index(&self, count: usize) -> Result { - self.s.slice_index(count) - } -} - -impl<'a> InputTake for Input<'a> { - #[inline] - fn take(&self, count: usize) -> Self { - let s = self.s.take(count); - self.of(s) - } - #[inline] - fn take_split(&self, count: usize) -> (Self, Self) { - let (l, r) = self.s.take_split(count); - (self.of(l), self.of(r)) - } -} - -impl<'a> InputTakeAtPosition for Input<'a> { - type Item = char; - - #[inline] - fn split_at_position>(&self, predicate: P) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self.s.split_at_position::<_, (&str, ErrorKind)>(predicate) { - Ok((l, r)) => Ok((self.of(l), self.of(r))), - Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), - Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), - Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), - } - } - - #[inline] - fn split_at_position1>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self - .s - .split_at_position1::<_, (&str, ErrorKind)>(predicate, e) - { - Ok((l, r)) => Ok((self.of(l), self.of(r))), - Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), - Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), - Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), - } - } - - #[inline] - fn split_at_position_complete>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self - .s - .split_at_position_complete::<_, (&str, ErrorKind)>(predicate) - { - Ok((l, r)) => Ok((self.of(l), self.of(r))), - Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), - Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), - Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), - } - } - - #[inline] - fn split_at_position1_complete>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self - .s - .split_at_position1_complete::<_, (&str, ErrorKind)>(predicate, e) - { - Ok((l, r)) => Ok((self.of(l), self.of(r))), - Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), - Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), - Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), - } - } -} - -impl<'a> Offset for Input<'a> { - fn offset(&self, second: &Self) -> usize { - self.s.offset(second.s) - } -} diff --git a/src/syntax/keyword.rs b/src/syntax/keyword.rs deleted file mode 100644 index 327f46f..0000000 --- a/src/syntax/keyword.rs +++ /dev/null @@ -1,290 +0,0 @@ -#![allow(clippy::type_complexity)] - -use nom::{ - branch::alt, - bytes::complete::{tag, take_till, take_while1}, - character::complete::space0, - combinator::{recognize, verify}, - sequence::tuple, - IResult, InputTake, -}; - -use super::{ - combinator::{blank_lines, hash_plus_token, node, trim_line_end, GreenElement}, - input::Input, - SyntaxKind, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn keyword_node(input: Input) -> IResult { - fn f(input: Input) -> IResult { - let (input, (key, mut nodes)) = keyword_node_base(input)?; - let (input, post_blank) = blank_lines(input)?; - nodes.extend(post_blank); - Ok(( - input, - node( - if key == "CALL" { - SyntaxKind::BABEL_CALL - } else { - SyntaxKind::KEYWORD - }, - nodes, - ), - )) - } - crate::lossless_parser!(f, input) -} - -/// Return empty vector if input doesn't contain affiliated keyword, or affiliated keyword is -/// followed by blank lines. -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn affiliated_keyword_nodes(input: Input) -> IResult, ()> { - let mut children = vec![]; - let mut i = input; - - while !i.is_empty() { - let Ok((input_, (key, nodes))) = keyword_node_base(i) else { - break; - }; - - let (input_, post_blank) = blank_lines(input_)?; - - // affiliated keyword can not followed by blank lines or eof - if !post_blank.is_empty() || input_.is_empty() { - return Ok((input, vec![])); - } - - if input_.c.affiliated_keywords.iter().all(|w| w != key) && !key.starts_with("ATTR_") { - break; - } - - debug_assert!(i.len() > input_.len(), "{} > {}", i.len(), input_.len()); - i = input_; - children.push(node(SyntaxKind::AFFILIATED_KEYWORD, nodes)); - } - - Ok((i, children)) -} - -pub fn tblfm_keyword_nodes(input: Input) -> IResult, ()> { - let mut children = vec![]; - let mut i = input; - - while !i.is_empty() { - let Ok((input, (key, nodes))) = keyword_node_base(i) else { - break; - }; - - if !key.eq_ignore_ascii_case("TBLFM") { - break; - } - - debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); - i = input; - children.push(node(SyntaxKind::KEYWORD, nodes)); - } - - Ok((i, children)) -} - -fn keyword_node_base(input: Input) -> IResult), ()> { - let (input, (ws, hash_plus)) = tuple((space0, hash_plus_token))(input)?; - - let (input, (key, optional, colon)) = alt((key_with_optional, key))(input)?; - - let (input, (value, ws_, nl)) = trim_line_end(input)?; - - let mut children = vec![]; - if !ws.is_empty() { - children.push(ws.ws_token()); - } - children.push(hash_plus); - children.push(key.text_token()); - if let Some((l_bracket, optional, r_bracket)) = optional { - children.push(l_bracket.token(SyntaxKind::L_BRACKET)); - children.push(optional.text_token()); - children.push(r_bracket.token(SyntaxKind::R_BRACKET)); - } - children.push(colon.token(SyntaxKind::COLON)); - children.push(value.text_token()); - if !ws_.is_empty() { - children.push(ws_.ws_token()); - } - if !nl.is_empty() { - children.push(nl.nl_token()); - } - - Ok((input, (key.s, children))) -} - -fn key(input: Input) -> IResult, Input), ()> { - let (input, output) = verify( - recognize(tuple(( - take_till(|c: char| c.is_ascii_whitespace() || c == ':'), - take_while1(|c: char| c == ':'), - ))), - |i: &Input| i.len() >= 2, - )(input)?; - let (colon, key) = output.take_split(output.len() - 1); - Ok((input, (key, None, colon))) -} - -fn key_with_optional( - input: Input, -) -> IResult, Input), ()> { - let (input, (key, r_backer, optional, l_backer, colon)) = tuple(( - alt((tag("CAPTION"), tag("RESULTS"))), - tag("["), - take_till(|c| c == '\r' || c == '\n' || c == ']'), - tag("]"), - tag(":"), - ))(input)?; - Ok((input, (key, Some((r_backer, optional, l_backer)), colon))) -} - -#[test] -fn parse() { - use crate::{ - ast::{BabelCall, Keyword}, - tests::to_ast, - ParseConfig, - }; - - let to_keyword = to_ast::(keyword_node); - - let to_babel_call = to_ast::(keyword_node); - - to_keyword("#+KEY:"); - to_keyword("#+::"); - to_keyword("#+::"); - to_keyword("#+:: "); - to_keyword("#+:: \n"); - to_keyword("#+::\n"); - - insta::assert_debug_snapshot!( - to_keyword("#+KEY:").syntax, - @r###" - KEYWORD@0..6 - HASH_PLUS@0..2 "#+" - TEXT@2..5 "KEY" - COLON@5..6 ":" - TEXT@6..6 "" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+KEY: VALUE").syntax, - @r###" - KEYWORD@0..12 - HASH_PLUS@0..2 "#+" - TEXT@2..5 "KEY" - COLON@5..6 ":" - TEXT@6..12 " VALUE" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+K_E_Y: VALUE").syntax, - @r###" - KEYWORD@0..14 - HASH_PLUS@0..2 "#+" - TEXT@2..7 "K_E_Y" - COLON@7..8 ":" - TEXT@8..14 " VALUE" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+KEY:VALUE\n").syntax, - @r###" - KEYWORD@0..12 - HASH_PLUS@0..2 "#+" - TEXT@2..5 "KEY" - COLON@5..6 ":" - TEXT@6..11 "VALUE" - NEW_LINE@11..12 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+RESULTS:").syntax, - @r###" - KEYWORD@0..10 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "RESULTS" - COLON@9..10 ":" - TEXT@10..10 "" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+ATTR_LATEX: :width 5cm\n").syntax, - @r###" - KEYWORD@0..25 - HASH_PLUS@0..2 "#+" - TEXT@2..12 "ATTR_LATEX" - COLON@12..13 ":" - TEXT@13..24 " :width 5cm" - NEW_LINE@24..25 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_babel_call("#+CALL: double(n=4)").syntax, - @r###" - BABEL_CALL@0..19 - HASH_PLUS@0..2 "#+" - TEXT@2..6 "CALL" - COLON@6..7 ":" - TEXT@7..19 " double(n=4)" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+ABC[OPTIONAL]: Longer value.").syntax, - @r###" - KEYWORD@0..30 - HASH_PLUS@0..2 "#+" - TEXT@2..15 "ABC[OPTIONAL]" - COLON@15..16 ":" - TEXT@16..30 " Longer value." - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+CAPTION: value").syntax, - @r###" - KEYWORD@0..16 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "CAPTION" - COLON@9..10 ":" - TEXT@10..16 " value" - "### - ); - - insta::assert_debug_snapshot!( - to_keyword("#+CAPTION[caption optional]: value").syntax, - @r###" - KEYWORD@0..34 - HASH_PLUS@0..2 "#+" - TEXT@2..9 "CAPTION" - L_BRACKET@9..10 "[" - TEXT@10..26 "caption optional" - R_BRACKET@26..27 "]" - COLON@27..28 ":" - TEXT@28..34 " value" - "### - ); - - let config = &ParseConfig::default(); - - assert!(keyword_node(("#+KE Y: VALUE", config).into()).is_err()); - assert!(keyword_node(("#+ KEY: VALUE", config).into()).is_err()); -} diff --git a/src/syntax/latex_environment.rs b/src/syntax/latex_environment.rs deleted file mode 100644 index 8e20e21..0000000 --- a/src/syntax/latex_environment.rs +++ /dev/null @@ -1,127 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_while1}, - character::complete::space0, - sequence::tuple, - IResult, InputTake, -}; - -use crate::SyntaxKind; - -use super::{ - combinator::{eol_or_eof, l_curly_token, line_starts_iter, node, r_curly_token, GreenElement}, - input::Input, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn latex_environment_node(input: Input) -> IResult { - crate::lossless_parser!(latex_environment_node_base, input) -} - -fn latex_environment_node_base(input: Input) -> IResult { - let (input, (ws1, begin, l1, name1, r1)) = tuple(( - space0, - tag("\\begin"), - l_curly_token, - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '*'), - r_curly_token, - ))(input)?; - - for (input, contents) in line_starts_iter(input.s).map(|i| input.take_split(i)) { - if let Ok((input, (ws2, end, l2, name2, r2, ws3, nl))) = tuple(( - space0, - tag("\\end"), - l_curly_token, - tag(name1.s), - r_curly_token, - space0, - eol_or_eof, - ))(input) - { - return Ok(( - input, - node( - SyntaxKind::LATEX_ENVIRONMENT, - [ - ws1.ws_token(), - begin.text_token(), - l1, - name1.text_token(), - r1, - contents.text_token(), - ws2.ws_token(), - end.text_token(), - l2, - name2.text_token(), - r2, - ws3.ws_token(), - nl.nl_token(), - ], - ), - )); - } - } - - Err(nom::Err::Error(())) -} - -#[test] -fn parse() { - use crate::ast::LatexEnvironment; - use crate::config::ParseConfig; - use crate::tests::to_ast; - - let to_latex = to_ast::(latex_environment_node); - - insta::assert_debug_snapshot!( - to_latex(r"\begin{NAME}\end{NAME}").syntax, - @r###" - LATEX_ENVIRONMENT@0..22 - WHITESPACE@0..0 "" - TEXT@0..6 "\\begin" - L_CURLY@6..7 "{" - TEXT@7..11 "NAME" - R_CURLY@11..12 "}" - TEXT@12..12 "" - WHITESPACE@12..12 "" - TEXT@12..16 "\\end" - L_CURLY@16..17 "{" - TEXT@17..21 "NAME" - R_CURLY@21..22 "}" - WHITESPACE@22..22 "" - NEW_LINE@22..22 "" - "### - ); - - insta::assert_debug_snapshot!( - to_latex( - r"\begin{align*} - 2x - 5y &= 8 \\ - 3x + 9y &= -12 - \end{align*}" - ).syntax, - @r###" - LATEX_ENVIRONMENT@0..70 - WHITESPACE@0..0 "" - TEXT@0..6 "\\begin" - L_CURLY@6..7 "{" - TEXT@7..13 "align*" - R_CURLY@13..14 "}" - TEXT@14..54 "\n 2x - 5y &= 8 \\\\\n ..." - WHITESPACE@54..58 " " - TEXT@58..62 "\\end" - L_CURLY@62..63 "{" - TEXT@63..69 "align*" - R_CURLY@69..70 "}" - WHITESPACE@70..70 "" - NEW_LINE@70..70 "" - "### - ); - - let c = ParseConfig::default(); - - assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err()); - assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err()); -} diff --git a/src/syntax/latex_fragment.rs b/src/syntax/latex_fragment.rs deleted file mode 100644 index 7ea5ec3..0000000 --- a/src/syntax/latex_fragment.rs +++ /dev/null @@ -1,199 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{take_until1, take_while1}, - character::complete::alpha1, - sequence::tuple, - IResult, InputTake, -}; - -use crate::SyntaxKind; - -use super::{ - combinator::{ - backslash_token, dollar2_token, dollar_token, l_bracket_token, l_curly_token, - l_parens_token, node, r_bracket_token, r_curly_token, r_parens_token, GreenElement, - }, - input::Input, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn latex_fragment_node(input: Input) -> IResult { - debug_assert!(input.s.starts_with(['\\', '$'])); - let mut parser = alt((template1, template2, template3, template4, template5)); - crate::lossless_parser!(parser, input) -} - -// \NAME[CONTENTS1] \NAME{CONTENTS1} -fn template1(input: Input) -> IResult { - let (input, (backslash, name)) = tuple((backslash_token, alpha1))(input)?; - let (input, (l, content, r)) = alt(( - tuple(( - l_bracket_token, - take_while1(|c| c != '{' && c != '}' && c != '[' && c != ']' && c != '\r' && c != '\n'), - r_bracket_token, - )), - tuple(( - l_curly_token, - take_while1(|c| c != '{' && c != '}' && c != '\r' && c != '\n'), - r_curly_token, - )), - ))(input)?; - Ok(( - input, - node( - SyntaxKind::LATEX_FRAGMENT, - [backslash, name.text_token(), l, content.text_token(), r], - ), - )) -} - -// \(CONTENTS\) -fn template2(input: Input) -> IResult { - let (input, (backslash1, l)) = tuple((backslash_token, l_parens_token))(input)?; - if let Some(i) = jetscii::Substring::new("\\)").find(input.s) { - let (input, content) = input.take_split(i); - let (input, (backslash2, r)) = tuple((backslash_token, r_parens_token))(input)?; - Ok(( - input, - node( - SyntaxKind::LATEX_FRAGMENT, - [backslash1, l, content.text_token(), backslash2, r], - ), - )) - } else { - Err(nom::Err::Error(())) - } -} - -// \[CONTENTS\] -fn template3(input: Input) -> IResult { - let (input, (backslash1, l)) = tuple((backslash_token, l_bracket_token))(input)?; - if let Some(i) = jetscii::Substring::new("\\]").find(input.s) { - let (input, content) = input.take_split(i); - let (input, (backslash2, r)) = tuple((backslash_token, r_bracket_token))(input)?; - Ok(( - input, - node( - SyntaxKind::LATEX_FRAGMENT, - [backslash1, l, content.text_token(), backslash2, r], - ), - )) - } else { - Err(nom::Err::Error(())) - } -} - -// $$CONTENTS$$ -fn template4(input: Input) -> IResult { - let (input, l) = dollar2_token(input)?; - let (input, content) = take_until1("$$")(input)?; - let (input, r) = dollar2_token(input)?; - Ok(( - input, - node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]), - )) -} - -// $CONTENTS$ -fn template5(input: Input) -> IResult { - let (input, l) = dollar_token(input)?; - let (input, content) = take_until1("$")(input)?; - let (input, r) = dollar_token(input)?; - - let b = content.as_bytes()[0]; - if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b';' | b'$') { - return Err(nom::Err::Error(())); - } - - let b = content.as_bytes()[content.s.len() - 1]; - if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b'$') { - return Err(nom::Err::Error(())); - } - - let p = input.bytes().next(); - if let Some(p) = p { - if !matches!(p, b')' | b'}' | b']' | b'\'' | b'"' | b' ' | b'\r' | b'\n') { - return Err(nom::Err::Error(())); - } - } - - Ok(( - input, - node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]), - )) -} - -#[test] -fn parse() { - use crate::{ast::LatexFragment, tests::to_ast, ParseConfig}; - - let to_fragment = to_ast::(latex_fragment_node); - - insta::assert_debug_snapshot!( - to_fragment("\\enlargethispage{2\\baselineskip}").syntax, - @r###" - LATEX_FRAGMENT@0..32 - BACKSLASH@0..1 "\\" - TEXT@1..16 "enlargethispage" - L_CURLY@16..17 "{" - TEXT@17..31 "2\\baselineskip" - R_CURLY@31..32 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_fragment("\\[a\\]").syntax, - @r###" - LATEX_FRAGMENT@0..5 - BACKSLASH@0..1 "\\" - L_BRACKET@1..2 "[" - TEXT@2..3 "a" - BACKSLASH@3..4 "\\" - R_BRACKET@4..5 "]" - "### - ); - - insta::assert_debug_snapshot!( - to_fragment("\\(e^{i \\pi}\\)").syntax, - @r###" - LATEX_FRAGMENT@0..13 - BACKSLASH@0..1 "\\" - L_PARENS@1..2 "(" - TEXT@2..11 "e^{i \\pi}" - BACKSLASH@11..12 "\\" - R_PARENS@12..13 ")" - "### - ); - - insta::assert_debug_snapshot!( - to_fragment("$\\frac{1}{3}$").syntax, - @r###" - LATEX_FRAGMENT@0..13 - DOLLAR@0..1 "$" - TEXT@1..12 "\\frac{1}{3}" - DOLLAR@12..13 "$" - "### - ); - - insta::assert_debug_snapshot!( - to_fragment("$a\nb$").syntax, - @r###" - LATEX_FRAGMENT@0..5 - DOLLAR@0..1 "$" - TEXT@1..4 "a\nb" - DOLLAR@4..5 "$" - "### - ); - - let c = ParseConfig::default(); - - assert!(latex_fragment_node(("$ LaTeXxxx$", &c).into()).is_err()); - assert!(latex_fragment_node(("$LaTeXxxx $", &c).into()).is_err()); - assert!(latex_fragment_node(("$a.$", &c).into()).is_err()); - assert!(latex_fragment_node(("$a$a", &c).into()).is_err()); - assert!(latex_fragment_node(("$$b\nol\nd*", &c).into()).is_err()); - assert!(latex_fragment_node(("$b\nol\nd*", &c).into()).is_err()); -} diff --git a/src/syntax/line_break.rs b/src/syntax/line_break.rs deleted file mode 100644 index 95b5789..0000000 --- a/src/syntax/line_break.rs +++ /dev/null @@ -1,71 +0,0 @@ -use nom::{character::complete::space0, combinator::map, sequence::tuple, IResult}; - -use crate::{ - syntax::combinator::{backslash_token, eol_or_eof, node}, - SyntaxKind, -}; - -use super::{combinator::GreenElement, input::Input}; - -pub fn line_break_node(input: Input) -> IResult { - debug_assert!(input.s.starts_with('\\')); - let mut parser = map( - tuple((backslash_token, backslash_token, space0, eol_or_eof)), - |(b1, b2, ws, nl)| { - node( - SyntaxKind::LINE_BREAK, - [b1, b2, ws.ws_token(), nl.nl_token()], - ) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::ast::LineBreak; - use crate::tests::to_ast; - - let to_line_break = to_ast::(line_break_node); - - insta::assert_debug_snapshot!( - to_line_break("\\\\\n").syntax, - @r###" - LINE_BREAK@0..3 - BACKSLASH@0..1 "\\" - BACKSLASH@1..2 "\\" - WHITESPACE@2..2 "" - NEW_LINE@2..3 "\n" - "### - ); - insta::assert_debug_snapshot!( - to_line_break("\\\\ \n").syntax, - @r###" - LINE_BREAK@0..6 - BACKSLASH@0..1 "\\" - BACKSLASH@1..2 "\\" - WHITESPACE@2..5 " " - NEW_LINE@5..6 "\n" - "### - ); - insta::assert_debug_snapshot!( - to_line_break("\\\\\r\n").syntax, - @r###" - LINE_BREAK@0..4 - BACKSLASH@0..1 "\\" - BACKSLASH@1..2 "\\" - WHITESPACE@2..2 "" - NEW_LINE@2..4 "\r\n" - "### - ); - insta::assert_debug_snapshot!( - to_line_break("\\\\ ").syntax, - @r###" - LINE_BREAK@0..6 - BACKSLASH@0..1 "\\" - BACKSLASH@1..2 "\\" - WHITESPACE@2..6 " " - NEW_LINE@6..6 "" - "### - ); -} diff --git a/src/syntax/link.rs b/src/syntax/link.rs deleted file mode 100644 index a2aa4d9..0000000 --- a/src/syntax/link.rs +++ /dev/null @@ -1,112 +0,0 @@ -use nom::{ - bytes::complete::take_while, - combinator::{map, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement, - }, - input::Input, - object::link_description_object_nodes, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn link_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_bracket2_token, - take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), - opt(tuple(( - r_bracket_token, - l_bracket_token, - take_while(|c: char| c != '[' && c != ']'), - ))), - r_bracket2_token, - )), - |(l_bracket2, path, desc, r_bracket2)| { - let mut children = vec![l_bracket2, path.token(LINK_PATH)]; - - if let Some((r_bracket, l_bracket, desc)) = desc { - children.extend([r_bracket, l_bracket]); - children.extend(link_description_object_nodes(desc)); - } - - children.push(r_bracket2); - - node(LINK, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::Link, tests::to_ast, ParseConfig}; - - let to_link = to_ast::(link_node); - - let link = to_link("[[#id]]"); - insta::assert_debug_snapshot!( - link.syntax, - @r###" - LINK@0..7 - L_BRACKET2@0..2 "[[" - LINK_PATH@2..5 "#id" - R_BRACKET2@5..7 "]]" - "### - ); - - let link = to_link("[[#id][desc]]"); - insta::assert_debug_snapshot!( - link.syntax, - @r###" - LINK@0..13 - L_BRACKET2@0..2 "[[" - LINK_PATH@2..5 "#id" - R_BRACKET@5..6 "]" - L_BRACKET@6..7 "[" - TEXT@7..11 "desc" - R_BRACKET2@11..13 "]]" - "### - ); - - let link = to_link("[[file:/home/dominik/images/jupiter.jpg]]"); - insta::assert_debug_snapshot!( - link.syntax, - @r###" - LINK@0..41 - L_BRACKET2@0..2 "[[" - LINK_PATH@2..39 "file:/home/dominik/im ..." - R_BRACKET2@39..41 "]]" - "### - ); - - let link = to_link("[[https://orgmode.org][*bold* description]]"); - insta::assert_debug_snapshot!( - link.syntax, - @r###" - LINK@0..43 - L_BRACKET2@0..2 "[[" - LINK_PATH@2..21 "https://orgmode.org" - R_BRACKET@21..22 "]" - L_BRACKET@22..23 "[" - BOLD@23..29 - STAR@23..24 "*" - TEXT@24..28 "bold" - STAR@28..29 "*" - TEXT@29..41 " description" - R_BRACKET2@41..43 "]]" - "### - ); - - let config = &ParseConfig::default(); - - assert!(link_node(("[[#id][desc]", config).into()).is_err()); -} diff --git a/src/syntax/list.rs b/src/syntax/list.rs deleted file mode 100644 index dbbd666..0000000 --- a/src/syntax/list.rs +++ /dev/null @@ -1,617 +0,0 @@ -use memchr::{memchr, memchr2}; -use nom::{ - branch::alt, - bytes::complete::{tag, take}, - character::complete::{alphanumeric1, digit1, space0, space1}, - combinator::{cond, map, opt, recognize, verify}, - sequence::{preceded, tuple}, - IResult, InputTake, -}; - -use super::{ - combinator::{ - at_token, blank_lines, colon2_token, eol_or_eof, l_bracket_token, line_starts_iter, node, - r_bracket_token, GreenElement, - }, - element::element_node, - input::Input, - keyword::affiliated_keyword_nodes, - object::standard_object_nodes, - paragraph::paragraph_nodes, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn list_node(input: Input) -> IResult { - crate::lossless_parser!(list_node_base, input) -} - -fn list_node_base(input: Input) -> IResult { - let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?; - let (input, first_indent) = space0(input)?; - let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?; - - let mut children = vec![]; - children.extend(affiliated_keywords); - children.push(first_item); - - let mut input = input; - while !ends_with_empty_blank_lines && !input.is_empty() { - let (input_, indent) = space0(input)?; - - if indent.len() != first_indent.len() { - break; - } - - let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_) - else { - break; - }; - - children.push(list_item); - debug_assert!( - input.len() > input_.len(), - "{} > {}", - input.len(), - input_.len(), - ); - input = input_; - - if ends_with_empty_blank_lines { - break; - } - } - - let (input, post_blank) = blank_lines(input)?; - - children.extend(post_blank); - - Ok((input, node(LIST, children))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s)) -)] -fn list_item_node<'a>( - indent: Input<'a>, - input: Input<'a>, -) -> IResult, (bool, GreenElement), ()> { - let (input, bullet) = recognize(tuple(( - alt(( - tag("+"), - tag("*"), - tag("-"), - preceded(digit1, tag(".")), - preceded(digit1, tag(")")), - )), - alt((space1, eol_or_eof)), - )))(input)?; - - // list item cannot have an asterisk at the beginning of line - if indent.is_empty() && bullet.s.starts_with('*') { - return Err(nom::Err::Error(())); - } - - if input.is_empty() { - return Ok(( - input, - ( - false, - node( - LIST_ITEM, - [ - indent.token(LIST_ITEM_INDENT), - bullet.token(LIST_ITEM_BULLET), - ], - ), - ), - )); - } - - let is_ordered = bullet.s.starts_with(|c: char| c.is_ascii_digit()); - let (input, counter) = opt(list_item_counter)(input)?; - let (input, checkbox) = opt(list_item_checkbox)(input)?; - let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?; - let (input, (ends_with_empty_blank_lines, content)) = - list_item_content_node(input, indent.len())?; - let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?; - - let mut children = vec![ - indent.token(LIST_ITEM_INDENT), - bullet.token(LIST_ITEM_BULLET), - ]; - - if let Some((counter, ws)) = counter { - children.extend([counter, ws.ws_token()]); - } - if let Some((checkbox, ws)) = checkbox { - children.extend([checkbox, ws.ws_token()]); - } - if let Some(Some((tag, ws))) = tag { - children.extend([tag, ws.ws_token()]); - } - - children.push(content); - if let Some(post_blank) = post_blank { - children.extend(post_blank); - } - - Ok(( - input, - (ends_with_empty_blank_lines, node(LIST_ITEM, children)), - )) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn list_item_counter(input: Input) -> IResult { - let (input, node) = map( - tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)), - |(l_bracket, at, char, r_bracket)| { - node( - LIST_ITEM_COUNTER, - [l_bracket, at, char.text_token(), r_bracket], - ) - }, - )(input)?; - - let (input, ws) = space0(input)?; - - Ok((input, (node, ws))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn list_item_checkbox(input: Input) -> IResult { - let (input, node) = map( - tuple(( - l_bracket_token, - verify(take(1usize), |input: &Input| { - input.s == " " || input.s == "X" || input.s == "-" - }), - r_bracket_token, - )), - |(l_bracket, char, r_bracket)| { - node( - LIST_ITEM_CHECK_BOX, - [l_bracket, char.text_token(), r_bracket], - ) - }, - )(input)?; - - let (input, ws) = space0(input)?; - - Ok((input, (node, ws))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn list_item_tag(input: Input) -> IResult { - let bytes = input.as_bytes(); - - let (input, tag) = match memchr2(b'\n', b':', bytes) { - Some(idx) if idx > 0 && bytes[idx] == b':' => input.take_split(idx), - _ => return Err(nom::Err::Error(())), - }; - let (input, ws) = space0(input)?; - let (input, colon2) = colon2_token(input)?; - - let mut children = standard_object_nodes(tag); - children.push(colon2); - - Ok((input, (node(LIST_ITEM_TAG, children), ws))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -fn list_item_content_node(input: Input, indent: usize) -> IResult { - if memchr(b'\n', input.as_bytes()).is_none() { - return Ok(( - input.of(""), - ( - false, - node( - LIST_ITEM_CONTENT, - [node(PARAGRAPH, standard_object_nodes(input))], - ), - ), - )); - }; - - let mut skip_one = true; - let mut i = input; - let mut children = vec![]; - let mut previous_blank_line: Option<(Input, Input)> = None; - 'l: while !i.is_empty() { - for (input, head) in line_starts_iter(i.as_str()) - // the first line in list item content will always be a paragraph - // so we need to skip it in the first iteration - .skip(if skip_one { 1 } else { 0 }) - .map(|idx| i.take_split(idx)) - { - match get_line_indent(input.as_str()) { - Some(next_indent) => { - if next_indent <= indent { - let (input, head) = previous_blank_line.unwrap_or((input, head)); - if !head.is_empty() { - children.extend(paragraph_nodes(head)?); - } - return Ok((input, (false, node(LIST_ITEM_CONTENT, children)))); - } - - previous_blank_line = None; - - if let Ok((input, element)) = element_node(input) { - if !head.is_empty() { - children.extend(paragraph_nodes(head)?); - } - children.push(element); - debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); - i = input; - skip_one = false; - continue 'l; - } - } - _ => { - // list item ends at two consecutive empty lines - if let Some((input, head)) = previous_blank_line { - if !head.is_empty() { - children.extend(paragraph_nodes(head)?); - } - - return Ok((input, (true, node(LIST_ITEM_CONTENT, children)))); - } else { - previous_blank_line = Some((input, head)) - } - } - } - } - children.extend(paragraph_nodes(i)?); - break; - } - - Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children)))) -} - -fn get_line_indent(input: &str) -> Option { - input - .bytes() - .take_while(|b| *b != b'\n') - .position(|b| !b.is_ascii_whitespace()) -} - -#[test] -fn parse() { - use crate::{ast::List, tests::to_ast, ParseConfig}; - - let to_list = to_ast::(list_node); - - insta::assert_debug_snapshot!( - to_list("1)").syntax, - @r###" - LIST@0..2 - LIST_ITEM@0..2 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "1)" - "### - ); - - insta::assert_debug_snapshot!( - to_list("+ ").syntax, - @r###" - LIST@0..2 - LIST_ITEM@0..2 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - "### - ); - - insta::assert_debug_snapshot!( - to_list("-\n").syntax, - @r###" - LIST@0..2 - LIST_ITEM@0..2 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "-\n" - "### - ); - - insta::assert_debug_snapshot!( - to_list("+ 1").syntax, - @r###" - LIST@0..3 - LIST_ITEM@0..3 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..3 - PARAGRAPH@2..3 - TEXT@2..3 "1" - "### - ); - - insta::assert_debug_snapshot!( - to_list("+ 1\n").syntax, - @r###" - LIST@0..4 - LIST_ITEM@0..4 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..4 - PARAGRAPH@2..4 - TEXT@2..4 "1\n" - "### - ); - - // list ends with two consecutive blank lines, and these blank lines - // will be the post_blank of list node - insta::assert_debug_snapshot!( - to_list("+ [@A] 1\n\n\n+ 2").syntax, - @r###" - LIST@0..11 - LIST_ITEM@0..9 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_COUNTER@2..6 - L_BRACKET@2..3 "[" - AT@3..4 "@" - TEXT@4..5 "A" - R_BRACKET@5..6 "]" - WHITESPACE@6..7 " " - LIST_ITEM_CONTENT@7..9 - PARAGRAPH@7..9 - TEXT@7..9 "1\n" - BLANK_LINE@9..10 "\n" - BLANK_LINE@10..11 "\n" - "### - ); - - // empty line between list item, the empty line will be - // the post_blank of first item - insta::assert_debug_snapshot!( - to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax, - @r###" - LIST@0..29 - LIST_ITEM@0..18 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_TAG@2..10 - BOLD@2..7 - STAR@2..3 "*" - TEXT@3..6 "TAG" - STAR@6..7 "*" - TEXT@7..8 " " - COLON2@8..10 "::" - WHITESPACE@10..10 "" - LIST_ITEM_CONTENT@10..17 - PARAGRAPH@10..17 - TEXT@10..17 " item1\n" - BLANK_LINE@17..18 "\n" - LIST_ITEM@18..29 - LIST_ITEM_INDENT@18..18 "" - LIST_ITEM_BULLET@18..20 "+ " - LIST_ITEM_CHECK_BOX@20..23 - L_BRACKET@20..21 "[" - TEXT@21..22 "X" - R_BRACKET@22..23 "]" - WHITESPACE@23..24 " " - LIST_ITEM_CONTENT@24..29 - PARAGRAPH@24..29 - TEXT@24..29 "item2" - "### - ); - - // nested list - let list = to_list( - r#"+ item1 - + item2"#, - ); - insta::assert_debug_snapshot!( - list.syntax, - @r###" - LIST@0..17 - LIST_ITEM@0..17 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..17 - PARAGRAPH@2..8 - TEXT@2..8 "item1\n" - LIST@8..17 - LIST_ITEM@8..17 - LIST_ITEM_INDENT@8..10 " " - LIST_ITEM_BULLET@10..12 "+ " - LIST_ITEM_CONTENT@12..17 - PARAGRAPH@12..17 - TEXT@12..17 "item2" - "### - ); - - insta::assert_debug_snapshot!( - to_list("+ item1\nitem2").syntax, - @r###" - LIST@0..8 - LIST_ITEM@0..8 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..8 - PARAGRAPH@2..8 - TEXT@2..8 "item1\n" - "### - ); - - insta::assert_debug_snapshot!( - to_list("+ item1\n\n still item 1").syntax, - @r###" - LIST@0..23 - LIST_ITEM@0..23 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..23 - PARAGRAPH@2..9 - TEXT@2..8 "item1\n" - BLANK_LINE@8..9 "\n" - PARAGRAPH@9..23 - TEXT@9..23 " still item 1" - "### - ); - - let list = to_list( - r#"+ item1 - + item2 - "#, - ); - insta::assert_debug_snapshot!( - list.syntax, - @r###" - LIST@0..26 - LIST_ITEM@0..26 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..2 "+ " - LIST_ITEM_CONTENT@2..26 - PARAGRAPH@2..8 - TEXT@2..8 "item1\n" - LIST@8..26 - LIST_ITEM@8..26 - LIST_ITEM_INDENT@8..14 " " - LIST_ITEM_BULLET@14..16 "+ " - LIST_ITEM_CONTENT@16..26 - PARAGRAPH@16..26 - TEXT@16..22 "item2\n" - BLANK_LINE@22..26 " " - "### - ); - - let list = to_list( - r#"1. item1 - - - item2 - -3. item 3"#, - ); - assert!(list.is_ordered()); - insta::assert_debug_snapshot!( - list.syntax, - @r###" - LIST@0..32 - LIST_ITEM@0..23 - LIST_ITEM_INDENT@0..0 "" - LIST_ITEM_BULLET@0..3 "1. " - LIST_ITEM_CONTENT@3..23 - PARAGRAPH@3..10 - TEXT@3..9 "item1\n" - BLANK_LINE@9..10 "\n" - LIST@10..23 - LIST_ITEM@10..23 - LIST_ITEM_INDENT@10..14 " " - LIST_ITEM_BULLET@14..16 "- " - LIST_ITEM_CONTENT@16..22 - PARAGRAPH@16..22 - TEXT@16..22 "item2\n" - BLANK_LINE@22..23 "\n" - LIST_ITEM@23..32 - LIST_ITEM_INDENT@23..23 "" - LIST_ITEM_BULLET@23..26 "3. " - LIST_ITEM_CONTENT@26..32 - PARAGRAPH@26..32 - TEXT@26..32 "item 3" - "### - ); - - // nested list - insta::assert_debug_snapshot!( - to_list(" + item1\n\n + item2").syntax, - @r###" - LIST@0..20 - LIST_ITEM@0..11 - LIST_ITEM_INDENT@0..2 " " - LIST_ITEM_BULLET@2..4 "+ " - LIST_ITEM_CONTENT@4..10 - PARAGRAPH@4..10 - TEXT@4..10 "item1\n" - BLANK_LINE@10..11 "\n" - LIST_ITEM@11..20 - LIST_ITEM_INDENT@11..13 " " - LIST_ITEM_BULLET@13..15 "+ " - LIST_ITEM_CONTENT@15..20 - PARAGRAPH@15..20 - TEXT@15..20 "item2" - "### - ); - - insta::assert_debug_snapshot!( - to_list(" 1. item1\n 2. item2\n 3. item3").syntax, - @r###" - LIST@0..42 - LIST_ITEM@0..42 - LIST_ITEM_INDENT@0..2 " " - LIST_ITEM_BULLET@2..5 "1. " - LIST_ITEM_CONTENT@5..42 - PARAGRAPH@5..11 - TEXT@5..11 "item1\n" - LIST@11..28 - LIST_ITEM@11..28 - LIST_ITEM_INDENT@11..19 " " - LIST_ITEM_BULLET@19..22 "2. " - LIST_ITEM_CONTENT@22..28 - PARAGRAPH@22..28 - TEXT@22..28 "item2\n" - LIST@28..42 - LIST_ITEM@28..42 - LIST_ITEM_INDENT@28..34 " " - LIST_ITEM_BULLET@34..37 "3. " - LIST_ITEM_CONTENT@37..42 - PARAGRAPH@37..42 - TEXT@37..42 "item3" - "### - ); - - // Indentation of lines within other greater elements do not count - insta::assert_debug_snapshot!( - to_list(" 1. item1\n #+begin_example\nhello\n#+end_example\n").syntax, - @r###" - LIST@0..51 - LIST_ITEM@0..51 - LIST_ITEM_INDENT@0..2 " " - LIST_ITEM_BULLET@2..5 "1. " - LIST_ITEM_CONTENT@5..51 - PARAGRAPH@5..11 - TEXT@5..11 "item1\n" - EXAMPLE_BLOCK@11..51 - BLOCK_BEGIN@11..31 - WHITESPACE@11..15 " " - TEXT@15..23 "#+begin_" - TEXT@23..30 "example" - NEW_LINE@30..31 "\n" - BLOCK_CONTENT@31..37 - TEXT@31..37 "hello\n" - BLOCK_END@37..51 - TEXT@37..43 "#+end_" - TEXT@43..50 "example" - NEW_LINE@50..51 "\n" - "### - ); - - to_list("- "); - to_list("-\t"); - to_list("-\r"); - to_list("-\t\n"); - to_list("-\r\n"); - to_list("-"); - - let config = &ParseConfig::default(); - - assert!(list_node(("-a", config).into()).is_err()); - assert!(list_node(("*\r\n", config).into()).is_err()); - assert!(list_node(("* ", config).into()).is_err()); -} diff --git a/src/syntax/macros.rs b/src/syntax/macros.rs deleted file mode 100644 index edbbe21..0000000 --- a/src/syntax/macros.rs +++ /dev/null @@ -1,106 +0,0 @@ -use nom::{ - bytes::complete::{take_until, take_while1}, - combinator::{map, opt, verify}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - l_curly3_token, l_parens_token, node, r_curly3_token, r_parens_token, GreenElement, - }, - input::Input, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn macros_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_curly3_token, - verify( - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - |s: &Input| s.as_bytes()[0].is_ascii_alphabetic(), - ), - opt(tuple((l_parens_token, take_until(")}}}"), r_parens_token))), - r_curly3_token, - )), - |(l_curly3, name, argument, r_curly3)| { - let mut children = vec![]; - children.push(l_curly3); - children.push(name.text_token()); - if let Some((l_parens, argument, r_parens)) = argument { - children.extend([l_parens, argument.text_token(), r_parens]); - } - children.push(r_curly3); - node(MACROS, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn test() { - use crate::{ast::Macros, tests::to_ast, ParseConfig}; - - let to_macros = to_ast::(macros_node); - - insta::assert_debug_snapshot!( - to_macros("{{{title}}}").syntax, - @r###" - MACROS@0..11 - L_CURLY3@0..3 "{{{" - TEXT@3..8 "title" - R_CURLY3@8..11 "}}}" - "### - ); - - insta::assert_debug_snapshot!( - to_macros("{{{one_arg_macro(1)}}}").syntax, - @r###" - MACROS@0..22 - L_CURLY3@0..3 "{{{" - TEXT@3..16 "one_arg_macro" - L_PARENS@16..17 "(" - TEXT@17..18 "1" - R_PARENS@18..19 ")" - R_CURLY3@19..22 "}}}" - "### - ); - - insta::assert_debug_snapshot!( - to_macros("{{{two_arg_macro(1, 2)}}}").syntax, - @r###" - MACROS@0..25 - L_CURLY3@0..3 "{{{" - TEXT@3..16 "two_arg_macro" - L_PARENS@16..17 "(" - TEXT@17..21 "1, 2" - R_PARENS@21..22 ")" - R_CURLY3@22..25 "}}}" - "### - ); - - insta::assert_debug_snapshot!( - to_macros("{{{two_arg_macro(1\\,a, 2)}}}").syntax, - @r###" - MACROS@0..28 - L_CURLY3@0..3 "{{{" - TEXT@3..16 "two_arg_macro" - L_PARENS@16..17 "(" - TEXT@17..24 "1\\,a, 2" - R_PARENS@24..25 ")" - R_CURLY3@25..28 "}}}" - "### - ); - - let config = &ParseConfig::default(); - - assert!(macros_node(("{{{0uthor}}}", config).into()).is_err()); - assert!(macros_node(("{{{author}}", config).into()).is_err()); - assert!(macros_node(("{{{poem(}}}", config).into()).is_err()); - assert!(macros_node(("{{{poem)}}}", config).into()).is_err()); -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs deleted file mode 100644 index 4b0a620..0000000 --- a/src/syntax/mod.rs +++ /dev/null @@ -1,322 +0,0 @@ -//! Org-mode elements - -pub mod block; -pub mod clock; -#[cfg(feature = "syntax-org-fc")] -pub mod cloze; -pub mod combinator; -pub mod comment; -pub mod cookie; -pub mod document; -pub mod drawer; -pub mod dyn_block; -pub mod element; -pub mod emphasis; -pub mod entity; -pub mod fixed_width; -pub mod fn_def; -pub mod fn_ref; -pub mod headline; -pub mod inline_call; -pub mod inline_src; -pub mod input; -pub mod keyword; -pub mod latex_environment; -pub mod latex_fragment; -pub mod line_break; -pub mod link; -pub mod list; -pub mod macros; -pub mod object; -pub mod paragraph; -pub mod planning; -pub mod radio_target; -pub mod rule; -pub mod snippet; -pub mod subscript_superscript; -pub mod table; -pub mod target; -pub mod timestamp; - -use rowan::Language; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct OrgLanguage; - -impl Language for OrgLanguage { - type Kind = SyntaxKind; - - fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind { - // SAFETY: SyntaxKind is `repr(u16)` - unsafe { std::mem::transmute::(raw.0) } - } - - fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind { - rowan::SyntaxKind(kind as u16) - } -} - -pub type SyntaxNode = rowan::SyntaxNode; -pub type SyntaxToken = rowan::SyntaxToken; -pub type SyntaxElement = rowan::SyntaxElement; -pub type SyntaxNodeChildren = rowan::SyntaxNodeChildren; -pub type SyntaxElementChildren = rowan::SyntaxElementChildren; - -#[allow(bad_style)] -#[allow(clippy::all)] -#[non_exhaustive] -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -#[repr(u16)] -pub enum SyntaxKind { - // - // token - // - L_BRACKET, // '[' - R_BRACKET, // ']' - L_BRACKET2, // '[[' - R_BRACKET2, // ']]' - L_PARENS, // '(' - R_PARENS, // ')' - L_ANGLE, // '<' - R_ANGLE, // '>' - L_CURLY, // '{' - R_CURLY, // '}' - L_CURLY2, // '{{' - L_CURLY3, // '{{{' - R_CURLY3, // '}}}' - L_ANGLE2, // '<<' - R_ANGLE2, // '>>' - L_ANGLE3, // '<<<' - R_ANGLE3, // '>>>' - AT, // '@' - AT2, // '@@' - PERCENT, // '%' - PERCENT2, // '%%' - SLASH, // '/' - BACKSLASH, // '\' - DOLLAR, // '$' - DOLLAR2, // '$$' - UNDERSCORE, // '_' - STAR, // '*' - PLUS, // '+' - MINUS, // '-' - MINUS2, // '--' - COLON, // ':' - COLON2, // '::' - EQUAL, // '=' - TILDE, // '~' - HASH, // '#' - HASH_PLUS, // '#+' - DOUBLE_ARROW, // '=>' - PIPE, // '|' - COMMA, // ',' - CARET, // '^' - NEW_LINE, // '\n' or '\r\n' or '\r' - WHITESPACE, // ' ' or '\t' - BLANK_LINE, - TEXT, - - DOCUMENT, - SECTION, - PARAGRAPH, - - HEADLINE, - HEADLINE_STARS, - HEADLINE_TITLE, - HEADLINE_KEYWORD_TODO, - HEADLINE_KEYWORD_DONE, - HEADLINE_PRIORITY, - HEADLINE_TAGS, - PROPERTY_DRAWER, - NODE_PROPERTY, - PLANNING, - PLANNING_DEADLINE, - PLANNING_SCHEDULED, - PLANNING_CLOSED, - - // - // elements - // - /* table */ - ORG_TABLE, - ORG_TABLE_RULE_ROW, - ORG_TABLE_STANDARD_ROW, - ORG_TABLE_CELL, - /* list */ - LIST, - LIST_ITEM, - LIST_ITEM_INDENT, - LIST_ITEM_BULLET, - LIST_ITEM_COUNTER, - LIST_ITEM_CHECK_BOX, - LIST_ITEM_TAG, - LIST_ITEM_CONTENT, - /* drawer */ - DRAWER, - DRAWER_BEGIN, - DRAWER_END, - DRAWER_CONTENT, - KEYWORD, - BABEL_CALL, - AFFILIATED_KEYWORD, - TABLE_EL, - CLOCK, - FN_DEF, - COMMENT, - RULE, - FIXED_WIDTH, - /* dyn block */ - DYN_BLOCK, - DYN_BLOCK_BEGIN, - DYN_BLOCK_END, - /* block */ - SPECIAL_BLOCK, - QUOTE_BLOCK, - CENTER_BLOCK, - VERSE_BLOCK, - COMMENT_BLOCK, - EXAMPLE_BLOCK, - EXPORT_BLOCK, - SOURCE_BLOCK, - SOURCE_BLOCK_LANG, - BLOCK_BEGIN, - BLOCK_END, - BLOCK_CONTENT, - SRC_BLOCK_SWITCHES, - SRC_BLOCK_LANGUAGE, - SRC_BLOCK_PARAMETERS, - EXPORT_BLOCK_TYPE, - LATEX_ENVIRONMENT, - - // - // objects - // - INLINE_CALL, - INLINE_SRC, - LINK, - LINK_PATH, - LINE_BREAK, - COOKIE, - RADIO_TARGET, - FN_REF, - LATEX_FRAGMENT, - MACROS, - SNIPPET, - TARGET, - BOLD, - STRIKE, - ITALIC, - UNDERLINE, - VERBATIM, - CODE, - ENTITY, - SUPERSCRIPT, - SUBSCRIPT, - - /* timestamp */ - TIMESTAMP_ACTIVE, - TIMESTAMP_INACTIVE, - TIMESTAMP_DIARY, - // timestamp tokens - TIMESTAMP_YEAR, - TIMESTAMP_MONTH, - TIMESTAMP_DAY, - TIMESTAMP_HOUR, - TIMESTAMP_MINUTE, - TIMESTAMP_DAYNAME, - // for repeater or delay - TIMESTAMP_REPEATER_MARK, - TIMESTAMP_DELAY_MARK, - TIMESTAMP_VALUE, - TIMESTAMP_UNIT, - - #[cfg(feature = "syntax-org-fc")] - CLOZE, -} - -impl From for rowan::SyntaxKind { - fn from(value: SyntaxKind) -> Self { - OrgLanguage::kind_to_raw(value) - } -} - -impl SyntaxKind { - /// whether this node is [object](https://orgmode.org/worg/org-syntax.html#Objects) - pub fn is_object(&self) -> bool { - matches!( - self, - SyntaxKind::ENTITY - | SyntaxKind::LATEX_FRAGMENT - | SyntaxKind::SNIPPET - | SyntaxKind::FN_REF - | SyntaxKind::INLINE_CALL - | SyntaxKind::INLINE_SRC - | SyntaxKind::LINE_BREAK - | SyntaxKind::LINK - | SyntaxKind::MACROS - | SyntaxKind::RADIO_TARGET - | SyntaxKind::COOKIE - | SyntaxKind::SUPERSCRIPT - | SyntaxKind::SUBSCRIPT - | SyntaxKind::ORG_TABLE_CELL - | SyntaxKind::TIMESTAMP_ACTIVE - | SyntaxKind::TIMESTAMP_INACTIVE - | SyntaxKind::TIMESTAMP_DIARY - | SyntaxKind::BOLD - | SyntaxKind::ITALIC - | SyntaxKind::UNDERLINE - | SyntaxKind::VERBATIM - | SyntaxKind::CODE - | SyntaxKind::STRIKE - ) - } - - /// whether this node is [element](https://orgmode.org/worg/org-syntax.html#Elements) - pub fn is_element(&self) -> bool { - matches!(self, SyntaxKind::HEADLINE | SyntaxKind::SECTION) - || self.is_lesser_element() - || self.is_greater_element() - } - - /// whether this node is [lesser element](https://orgmode.org/worg/org-syntax.html#Lesser_Elements) - pub fn is_lesser_element(&self) -> bool { - matches!( - self, - SyntaxKind::COMMENT_BLOCK - | SyntaxKind::EXAMPLE_BLOCK - | SyntaxKind::EXPORT_BLOCK - | SyntaxKind::SOURCE_BLOCK - | SyntaxKind::VERSE_BLOCK - | SyntaxKind::CLOCK - | SyntaxKind::PLANNING - | SyntaxKind::COMMENT - | SyntaxKind::FIXED_WIDTH - | SyntaxKind::RULE - | SyntaxKind::KEYWORD - | SyntaxKind::AFFILIATED_KEYWORD - | SyntaxKind::BABEL_CALL - | SyntaxKind::LATEX_ENVIRONMENT - | SyntaxKind::NODE_PROPERTY - | SyntaxKind::PARAGRAPH - | SyntaxKind::ORG_TABLE_RULE_ROW - | SyntaxKind::ORG_TABLE_STANDARD_ROW - ) - } - - /// whether this node is [greater element](https://orgmode.org/worg/org-syntax.html#Greater_Elements) - pub fn is_greater_element(&self) -> bool { - matches!( - self, - SyntaxKind::CENTER_BLOCK - | SyntaxKind::QUOTE_BLOCK - | SyntaxKind::SPECIAL_BLOCK - | SyntaxKind::DRAWER - | SyntaxKind::DYN_BLOCK - | SyntaxKind::FN_DEF - | SyntaxKind::LIST_ITEM - | SyntaxKind::LIST - | SyntaxKind::PROPERTY_DRAWER - | SyntaxKind::ORG_TABLE - ) - } -} diff --git a/src/syntax/object.rs b/src/syntax/object.rs deleted file mode 100644 index 4bacba1..0000000 --- a/src/syntax/object.rs +++ /dev/null @@ -1,375 +0,0 @@ -use nom::{IResult, InputTake}; - -use super::{ - combinator::GreenElement, - cookie::cookie_node, - emphasis::{ - self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node, - }, - entity::entity_node, - fn_ref::fn_ref_node, - inline_call::inline_call_node, - inline_src::inline_src_node, - input::Input, - latex_fragment::latex_fragment_node, - line_break::line_break_node, - link::link_node, - macros::macros_node, - radio_target::radio_target_node, - snippet::snippet_node, - subscript_superscript::{self, subscript_node, superscript_node}, - target::target_node, - timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node}, -}; - -struct ObjectPositions<'a> { - input: Input<'a>, - pos: usize, - finder: jetscii::BytesConst, -} - -impl ObjectPositions<'_> { - fn standard(input: Input) -> ObjectPositions { - ObjectPositions { - input, - pos: 0, - finder: jetscii::bytes!( - b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ - b'@', /* snippet */ - b'<', /* timestamp, target, radio target */ - b'[', /* link, cookie, fn_ref, timestamp */ - b'c', /* inline call */ - b's', /* inline source */ - b'\\', b'$', /* latex & entity */ - b'{', /* macros */ - b'^', /* superscript */ - b'_' /* subscript */ - ), - } - } - - fn minimal(input: Input) -> ObjectPositions { - ObjectPositions { - input, - pos: 0, - finder: jetscii::bytes!( - b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ - b'\\', b'$', /* latex & entity */ - b'^', /* superscript */ - b'_' /* subscript */ - ), - } - } - - fn link_description(input: Input) -> ObjectPositions { - ObjectPositions { - input, - pos: 0, - finder: jetscii::bytes!( - b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ - b'\\', b'$', /* latex & entity */ - b'@', /* snippet */ - b'c', /* inline call */ - b's', /* inline source */ - b'{', /* macros */ - b'[', /* cookie */ - b'^', /* superscript */ - b'_' /* subscript */ - ), - } - } -} - -impl<'a> Iterator for ObjectPositions<'a> { - type Item = (Input<'a>, Input<'a>); - - fn next(&mut self) -> Option { - if self.input.len() < 2 || self.pos >= self.input.len() { - return None; - } - - let previous = self.pos; - let i = self.finder.find(&self.input.as_bytes()[self.pos..])?; - let p = self.pos + i; - - self.pos = p + 1; - - debug_assert!( - previous < self.pos && self.pos <= self.input.s.len(), - "{} < {} < {}", - previous, - self.pos, - self.input.s.len() - ); - - // a valid object requires at least two characters - if self.input.s.len() - p < 2 { - return None; - } - - Some(self.input.take_split(p)) - } -} - -/// parse minimal sets of objects, including -/// - LaTeX fragments ('\\') -/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/') -/// - Entities ('\\') -/// - Superscripts and Subscripts -pub fn minimal_object_nodes(input: Input) -> Vec { - object_nodes( - ObjectPositions::minimal, - |i: Input, pre: Input| match &i.as_bytes()[0] { - b'*' if emphasis::verify_pre(pre.s) => bold_node(i), - b'+' if emphasis::verify_pre(pre.s) => strike_node(i), - b'/' if emphasis::verify_pre(pre.s) => italic_node(i), - b'_' if emphasis::verify_pre(pre.s) => underline_node(i), - b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), - b'~' if emphasis::verify_pre(pre.s) => code_node(i), - b'$' => latex_fragment_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), - b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), - b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), - _ => Err(nom::Err::Error(())), - }, - input, - ) -} - -/// parses standard sets of objects, including -/// -/// - Entities -/// - LaTeX Fragments -/// - Export Snippets -/// - Footnote References -/// - Inline Babel Calls -/// - Inline Source Blocks -/// - Links -/// - Macros -/// - Targets and Radio Targets -/// - Statistics Cookies -/// - Timestamps -/// - Text Markup (bold code strike verbatim underline italic) -/// - Line Breaks -/// - Subscript and Superscript -/// - Cloze (if `syntax-org-fc` is enabled) -/// -/// // todo: -/// - Citations -pub fn standard_object_nodes(input: Input) -> Vec { - object_nodes( - ObjectPositions::standard, - |i: Input, pre: Input| match &i.as_bytes()[0] { - b'*' if emphasis::verify_pre(pre.s) => bold_node(i), - b'+' if emphasis::verify_pre(pre.s) => strike_node(i), - b'/' if emphasis::verify_pre(pre.s) => italic_node(i), - b'_' if emphasis::verify_pre(pre.s) => underline_node(i), - b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), - b'~' if emphasis::verify_pre(pre.s) => code_node(i), - b'@' => snippet_node(i), - b'{' => { - cfg_if::cfg_if! { - if #[cfg(feature = "syntax-org-fc")] { - macros_node(i).or_else(|_| super::cloze::cloze_node(i)) - } else { - macros_node(i) - } - } - } - b'<' => radio_target_node(i) - .or_else(|_| target_node(i)) - .or_else(|_| timestamp_diary_node(i)) - .or_else(|_| timestamp_active_node(i)), - b'[' => cookie_node(i) - .or_else(|_| link_node(i)) - .or_else(|_| fn_ref_node(i)) - .or_else(|_| timestamp_inactive_node(i)), - // NOTE: although not specified in document, inline call and inline src follows the - // same pre tokens rule as text markup - b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), - b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), - b'$' => latex_fragment_node(i), - b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), - b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), - b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), - _ => Err(nom::Err::Error(())), - }, - input, - ) -} - -pub fn link_description_object_nodes(input: Input) -> Vec { - object_nodes( - ObjectPositions::link_description, - |i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] { - b'@' => snippet_node(i), - b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), - b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), - b'{' => macros_node(i), - b'[' => cookie_node(i), - b'*' if emphasis::verify_pre(pre.s) => bold_node(i), - b'+' if emphasis::verify_pre(pre.s) => strike_node(i), - b'/' if emphasis::verify_pre(pre.s) => italic_node(i), - b'_' if emphasis::verify_pre(pre.s) => underline_node(i), - b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), - b'~' if emphasis::verify_pre(pre.s) => code_node(i), - b'$' => latex_fragment_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), - b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), - b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), - _ => Err(nom::Err::Error(())), - }, - input, - ) -} - -fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec -where - F: Fn(Input) -> ObjectPositions, - P: Fn(Input<'a>, Input<'a>) -> IResult, GreenElement, ()>, -{ - let mut i = input; - let mut nodes = vec![]; - - 'l: while !i.is_empty() { - for (input, head) in position(i) { - debug_assert!( - input.s.len() >= 2, - "object must have at least two characters: {:?}", - input.s - ); - - if let Ok((input, pre)) = parse(input, head) { - if !head.is_empty() { - nodes.push(head.text_token()) - } - nodes.push(pre); - debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); - i = input; - continue 'l; - } - } - nodes.push(i.text_token()); - break; - } - - debug_assert_eq!( - input.as_str(), - nodes.iter().fold(String::new(), |s, i| s + &i.to_string()), - "parser must be lossless" - ); - - nodes -} - -#[test] -fn positions() { - let config = crate::ParseConfig::default(); - - let vec = ObjectPositions::standard(("*", &config).into()).collect::>(); - assert!(vec.is_empty()); - - let vec = ObjectPositions::standard(("*{", &config).into()).collect::>(); - assert_eq!(vec.len(), 1); - assert_eq!(vec[0].0.s, "*{"); - - // https://github.com/PoiScript/orgize/issues/69 - let vec = ObjectPositions::standard(("{3}", &config).into()).collect::>(); - assert_eq!(vec.len(), 1); - assert_eq!(vec[0].0.s, "{3}"); - - let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::>(); - assert_eq!(vec.len(), 7); - assert_eq!(vec[0].0.s, "*{()}//s\nc<<"); - assert_eq!(vec[1].0.s, "{()}//s\nc<<"); - assert_eq!(vec[2].0.s, "//s\nc<<"); - assert_eq!(vec[3].0.s, "/s\nc<<"); - assert_eq!(vec[4].0.s, "s\nc<<"); - assert_eq!(vec[5].0.s, "c<<"); - assert_eq!(vec[6].0.s, "<<"); -} - -#[test] -fn parse() { - use crate::{ - syntax::{combinator::node, SyntaxKind, SyntaxNode}, - ParseConfig, - }; - - let t = |input: &str| { - let config = &ParseConfig::default(); - let children = standard_object_nodes((input, config).into()); - SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap()) - }; - - insta::assert_debug_snapshot!( - t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"), - @r###" - PARAGRAPH@0..93 - CODE@0..26 - TILDE@0..1 "~" - TEXT@1..25 "org-inlinetask-min-level" - TILDE@25..26 "~" - FN_REF@26..93 - L_BRACKET@26..27 "[" - TEXT@27..29 "fn" - COLON@29..30 ":" - TEXT@30..34 "oiml" - COLON@34..35 ":" - TEXT@35..57 "The default value of \n" - CODE@57..83 - TILDE@57..58 "~" - TEXT@58..82 "org-inlinetask-min-level" - TILDE@82..83 "~" - TEXT@83..87 " is " - VERBATIM@87..91 - EQUAL@87..88 "=" - TEXT@88..90 "15" - EQUAL@90..91 "=" - TEXT@91..92 "." - R_BRACKET@92..93 "]" - "### - ); - - insta::assert_debug_snapshot!( - t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003. -The canonical parser is =org-element.el=, which provides a number of -functions starting with ~org-element-~."#), - @r###" - PARAGRAPH@0..175 - TEXT@0..9 "Org is a " - ITALIC@9..34 - SLASH@9..10 "/" - TEXT@10..33 "plaintext markup syntax" - SLASH@33..34 "/" - TEXT@34..50 " developed with " - BOLD@50..57 - STAR@50..51 "*" - TEXT@51..56 "Emacs" - STAR@56..57 "*" - TEXT@57..91 " in 2003.\nThe canonic ..." - VERBATIM@91..107 - EQUAL@91..92 "=" - TEXT@92..106 "org-element.el" - EQUAL@106..107 "=" - TEXT@107..160 ", which provides a nu ..." - CODE@160..174 - TILDE@160..161 "~" - TEXT@161..173 "org-element-" - TILDE@173..174 "~" - TEXT@174..175 "." - "### - ); - - insta::assert_debug_snapshot!( - t("a^abc"), - @r###" - PARAGRAPH@0..5 - TEXT@0..1 "a" - SUPERSCRIPT@1..5 - CARET@1..2 "^" - TEXT@2..5 "abc" - "### - ); -} diff --git a/src/syntax/paragraph.rs b/src/syntax/paragraph.rs deleted file mode 100644 index f98c22e..0000000 --- a/src/syntax/paragraph.rs +++ /dev/null @@ -1,101 +0,0 @@ -use nom::{IResult, InputTake}; - -use super::{ - combinator::{blank_lines, line_ends_iter, node, GreenElement}, - input::Input, - keyword::affiliated_keyword_nodes, - object::standard_object_nodes, - SyntaxKind, -}; - -/// Recognizes one paragraph -pub fn paragraph_node(input: Input) -> IResult { - crate::lossless_parser!(paragraph_node_base, input) -} - -/// Recognizes multiple paragraphs -pub fn paragraph_nodes(input: Input) -> Result, nom::Err<()>> { - let mut i = input; - let mut children = vec![]; - while !i.is_empty() { - let (input, node) = paragraph_node(i)?; - children.push(node); - debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); - i = input; - } - Ok(children) -} - -fn paragraph_node_base(input: Input) -> IResult { - debug_assert!(!input.is_empty()); - - let (input, keywords) = affiliated_keyword_nodes(input)?; - - let mut start = 0; - for idx in line_ends_iter(input.as_str()) { - // stops at blank line - if input.s[start..idx].bytes().all(|c| c.is_ascii_whitespace()) { - break; - } - - start = idx; - } - - let (input, contents) = input.take_split(start); - let (input, post_blank) = blank_lines(input)?; - - let mut children = vec![]; - children.extend(keywords); - children.extend(standard_object_nodes(contents)); - children.extend(post_blank); - - Ok((input, node(SyntaxKind::PARAGRAPH, children))) -} - -#[test] -fn parse() { - use crate::{ast::Paragraph, tests::to_ast}; - - let to_paragraph = to_ast::(paragraph_node); - - insta::assert_debug_snapshot!( - to_paragraph(r#"a"#).syntax, - @r###" - PARAGRAPH@0..1 - TEXT@0..1 "a" - "### - ); - - insta::assert_debug_snapshot!( - to_paragraph(r#"a - "#).syntax, - @r###" - PARAGRAPH@0..6 - TEXT@0..2 "a\n" - BLANK_LINE@2..6 " " - "### - ); - - insta::assert_debug_snapshot!( - to_paragraph(r#"a -b -c -"#).syntax, - @r###" - PARAGRAPH@0..6 - TEXT@0..6 "a\nb\nc\n" - "### - ); - - insta::assert_debug_snapshot!( - to_paragraph(r#"a - -c -"#).syntax, - @r###" - PARAGRAPH@0..3 - TEXT@0..2 "a\n" - BLANK_LINE@2..3 "\n" - "### - ); -} diff --git a/src/syntax/planning.rs b/src/syntax/planning.rs deleted file mode 100644 index d35c270..0000000 --- a/src/syntax/planning.rs +++ /dev/null @@ -1,91 +0,0 @@ -use nom::{ - branch::alt, bytes::complete::tag, character::complete::space0, combinator::iterator, - sequence::tuple, IResult, -}; - -use super::{ - combinator::{eol_or_eof, GreenElement, NodeBuilder}, - input::Input, - timestamp::{timestamp_active_node, timestamp_inactive_node}, - SyntaxKind::*, -}; - -pub fn planning_node(input: Input) -> IResult { - debug_assert!(!input.is_empty()); - crate::lossless_parser!(planning_node_base, input) -} - -fn planning_node_base(input: Input) -> IResult { - let mut b = NodeBuilder::new(); - - let mut it = iterator( - input, - tuple(( - space0, - alt((tag("DEADLINE:"), tag("SCHEDULED:"), tag("CLOSED:"))), - space0, - alt((timestamp_active_node, timestamp_inactive_node)), - )), - ); - - let start_len = b.len(); - - it.for_each(|(ws, text, ws_, timestamp)| { - let mut b_ = NodeBuilder::new(); - b_.ws(ws); - b_.text(text); - b_.ws(ws_); - b_.push(timestamp); - b.push(b_.finish(match text.as_str() { - "DEADLINE:" => PLANNING_DEADLINE, - "SCHEDULED:" => PLANNING_SCHEDULED, - "CLOSED:" => PLANNING_CLOSED, - _ => unreachable!(), - })); - }); - - if b.len() == start_len { - return Err(nom::Err::Error(())); - } - - let (input, _) = it.finish()?; - let (input, ws) = space0(input)?; - let (input, nl) = eol_or_eof(input)?; - - b.ws(ws); - b.nl(nl); - - Ok((input, b.finish(PLANNING))) -} - -#[test] -fn prase() { - use crate::{ast::Planning, tests::to_ast, ParseConfig}; - - let to_planning = to_ast::(planning_node); - - insta::assert_debug_snapshot!( - to_planning("SCHEDULED: <2019-04-08 Mon>").syntax, - @r###" - PLANNING@0..27 - PLANNING_SCHEDULED@0..27 - TEXT@0..10 "SCHEDULED:" - WHITESPACE@10..11 " " - TIMESTAMP_ACTIVE@11..27 - L_ANGLE@11..12 "<" - TIMESTAMP_YEAR@12..16 "2019" - MINUS@16..17 "-" - TIMESTAMP_MONTH@17..19 "04" - MINUS@19..20 "-" - TIMESTAMP_DAY@20..22 "08" - WHITESPACE@22..23 " " - TIMESTAMP_DAYNAME@23..26 "Mon" - R_ANGLE@26..27 ">" - "### - ); - - let config = &ParseConfig::default(); - - assert!(planning_node((" ", config).into()).is_err()); - assert!(planning_node((" SCHEDULED: ", config).into()).is_err()); -} diff --git a/src/syntax/radio_target.rs b/src/syntax/radio_target.rs deleted file mode 100644 index 74fbfac..0000000 --- a/src/syntax/radio_target.rs +++ /dev/null @@ -1,83 +0,0 @@ -use nom::{ - bytes::complete::take_while, - combinator::{map, verify}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{l_angle3_token, node, r_angle3_token, GreenElement}, - input::Input, - object::minimal_object_nodes, - SyntaxKind::*, -}; - -pub fn radio_target_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_angle3_token, - verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &Input| { - s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ') - }, - ), - r_angle3_token, - )), - |(l_angle3, contents, r_angle3)| { - let mut children = vec![l_angle3]; - children.extend(minimal_object_nodes(contents)); - children.push(r_angle3); - node(RADIO_TARGET, children) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::RadioTarget, tests::to_ast, ParseConfig}; - - let to_radio_target = to_ast::(radio_target_node); - - insta::assert_debug_snapshot!( - to_radio_target("<<>>").syntax, - @r###" - RADIO_TARGET@0..12 - L_ANGLE3@0..3 "<<<" - TEXT@3..9 "target" - R_ANGLE3@9..12 ">>>" - "### - ); - - insta::assert_debug_snapshot!( - to_radio_target("<<>>").syntax, - @r###" - RADIO_TARGET@0..13 - L_ANGLE3@0..3 "<<<" - TEXT@3..10 "tar get" - R_ANGLE3@10..13 ">>>" - "### - ); - - insta::assert_debug_snapshot!( - to_radio_target("<<<\\alpha>>>").syntax, - @r###" - RADIO_TARGET@0..12 - L_ANGLE3@0..3 "<<<" - ENTITY@3..9 - BACKSLASH@3..4 "\\" - TEXT@4..9 "alpha" - R_ANGLE3@9..12 ">>>" - "### - ); - - let config = &ParseConfig::default(); - - assert!(radio_target_node(("<<>>", config).into()).is_err()); - assert!(radio_target_node(("<<< target>>>", config).into()).is_err()); - assert!(radio_target_node(("<<>>", config).into()).is_err()); - assert!(radio_target_node(("<<get>>>", config).into()).is_err()); - assert!(radio_target_node(("<<>>", config).into()).is_err()); - assert!(radio_target_node(("<<>", config).into()).is_err()); -} diff --git a/src/syntax/rule.rs b/src/syntax/rule.rs deleted file mode 100644 index cf34171..0000000 --- a/src/syntax/rule.rs +++ /dev/null @@ -1,88 +0,0 @@ -use nom::{ - bytes::complete::take_while_m_n, character::complete::space0, combinator::map, sequence::tuple, - IResult, -}; - -use super::{ - combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, - input::Input, - SyntaxKind::*, -}; - -pub fn rule_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - space0, - take_while_m_n(5, usize::MAX, |c| c == '-'), - space0, - eol_or_eof, - blank_lines, - )), - |(ws, dashes, ws_, nl, post_blank)| { - let mut b = NodeBuilder::new(); - b.ws(ws); - b.text(dashes); - b.ws(ws_); - b.nl(nl); - b.children.extend(post_blank); - b.finish(RULE) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::Rule, tests::to_ast, ParseConfig}; - - let to_rule = to_ast::(rule_node); - - insta::assert_debug_snapshot!( - to_rule("-----").syntax, - @r###" - RULE@0..5 - TEXT@0..5 "-----" - "### - ); - - insta::assert_debug_snapshot!( - to_rule("--------").syntax, - @r###" - RULE@0..8 - TEXT@0..8 "--------" - "### - ); - - insta::assert_debug_snapshot!( - to_rule("-----\n\n\n").syntax, - @r###" - RULE@0..8 - TEXT@0..5 "-----" - NEW_LINE@5..6 "\n" - BLANK_LINE@6..7 "\n" - BLANK_LINE@7..8 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_rule("----- \n").syntax, - @r###" - RULE@0..8 - TEXT@0..5 "-----" - WHITESPACE@5..7 " " - NEW_LINE@7..8 "\n" - "### - ); - - let config = &ParseConfig::default(); - - assert!(rule_node(("", config).into()).is_err()); - assert!(rule_node(("----", config).into()).is_err()); - assert!(rule_node(("None----", config).into()).is_err()); - assert!(rule_node(("None ----", config).into()).is_err()); - assert!(rule_node(("None------", config).into()).is_err()); - assert!(rule_node(("----None----", config).into()).is_err()); - assert!(rule_node(("\t\t----", config).into()).is_err()); - assert!(rule_node(("------None", config).into()).is_err()); - assert!(rule_node(("----- None", config).into()).is_err()); -} diff --git a/src/syntax/snippet.rs b/src/syntax/snippet.rs deleted file mode 100644 index 5f55f44..0000000 --- a/src/syntax/snippet.rs +++ /dev/null @@ -1,92 +0,0 @@ -use nom::{ - bytes::complete::{take_until, take_while1}, - combinator::map, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{at2_token, colon_token, node, GreenElement}, - input::Input, - SyntaxKind::*, -}; - -pub fn snippet_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - at2_token, - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'), - colon_token, - take_until("@@"), - at2_token, - )), - |(at2, name, colon, value, at2_)| { - node( - SNIPPET, - [at2, name.text_token(), colon, value.text_token(), at2_], - ) - }, - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::Snippet, tests::to_ast, ParseConfig}; - - let to_snippet = to_ast::(snippet_node); - - insta::assert_debug_snapshot!( - to_snippet("@@html:@@").syntax, - @r###" - SNIPPET@0..12 - AT2@0..2 "@@" - TEXT@2..6 "html" - COLON@6..7 ":" - TEXT@7..10 "" - AT2@10..12 "@@" - "### - ); - - insta::assert_debug_snapshot!( - to_snippet("@@latex:any arbitrary LaTeX code@@").syntax, - @r###" - SNIPPET@0..34 - AT2@0..2 "@@" - TEXT@2..7 "latex" - COLON@7..8 ":" - TEXT@8..32 "any arbitrary LaTeX code" - AT2@32..34 "@@" - "### - ); - - insta::assert_debug_snapshot!( - to_snippet("@@html:@@").syntax, - @r###" - SNIPPET@0..9 - AT2@0..2 "@@" - TEXT@2..6 "html" - COLON@6..7 ":" - TEXT@7..7 "" - AT2@7..9 "@@" - "### - ); - - insta::assert_debug_snapshot!( - to_snippet("@@html:

    @

    @@").syntax, - @r###" - SNIPPET@0..17 - AT2@0..2 "@@" - TEXT@2..6 "html" - COLON@6..7 ":" - TEXT@7..15 "

    @

    " - AT2@15..17 "@@" - "### - ); - - let config = &ParseConfig::default(); - - assert!(snippet_node(("@@html:@", config).into()).is_err()); - assert!(snippet_node(("@@html@@", config).into()).is_err()); - assert!(snippet_node(("@@:@@", config).into()).is_err()); -} diff --git a/src/syntax/subscript_superscript.rs b/src/syntax/subscript_superscript.rs deleted file mode 100644 index 831ff22..0000000 --- a/src/syntax/subscript_superscript.rs +++ /dev/null @@ -1,179 +0,0 @@ -use memchr::memchr2_iter; -use nom::{ - branch::alt, - bytes::complete::{tag, take_while1}, - combinator::opt, - IResult, InputTake, -}; - -use crate::{ - syntax::{ - combinator::{caret_token, underscore_token}, - object::standard_object_nodes, - }, - SyntaxKind, -}; - -use super::{ - combinator::{l_curly_token, node, r_curly_token, GreenElement}, - input::Input, -}; - -pub fn superscript_node(input: Input) -> IResult { - let (input, caret) = caret_token(input)?; - - let mut children = vec![caret]; - - if input.c.use_sub_superscript.is_brace() { - let (input, rest) = template1(input)?; - children.extend(rest); - return Ok((input, node(SyntaxKind::SUPERSCRIPT, children))); - } - - let (input, rest) = alt((template0, template1, template2))(input)?; - children.extend(rest); - - Ok((input, node(SyntaxKind::SUPERSCRIPT, children))) -} - -pub fn subscript_node(input: Input) -> IResult { - let (input, underscore) = underscore_token(input)?; - - let mut children = vec![underscore]; - - if input.c.use_sub_superscript.is_brace() { - let (input, rest) = template1(input)?; - children.extend(rest); - return Ok((input, node(SyntaxKind::SUBSCRIPT, children))); - } - - let (input, rest) = alt((template0, template1, template2))(input)?; - children.extend(rest); - - Ok((input, node(SyntaxKind::SUBSCRIPT, children))) -} - -fn template0(input: Input) -> IResult, ()> { - let (input, star) = tag("*")(input)?; - Ok((input, vec![star.text_token()])) -} - -fn template1(input: Input) -> IResult, ()> { - let (input, l) = l_curly_token(input)?; - let (input, contents) = balanced_brackets(input)?; - let (input, r) = r_curly_token(input)?; - let mut children = vec![]; - children.push(l); - children.extend(standard_object_nodes(contents)); - children.push(r); - Ok((input, children)) -} - -fn template2(input: Input) -> IResult, ()> { - let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?; - - let (input, contents) = - take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?; - - if contents.s.ends_with(|c: char| !c.is_alphanumeric()) { - return Err(nom::Err::Error(())); - } - - let mut children = vec![]; - - if let Some(s) = sign { - children.push(s.text_token()) - } - - children.push(contents.text_token()); - - Ok((input, children)) -} - -fn balanced_brackets(input: Input) -> IResult { - let mut pairs = 1; - let bytes = input.as_bytes(); - for i in memchr2_iter(b'{', b'}', bytes) { - if bytes[i] == b'{' { - pairs += 1; - } else if pairs != 1 { - pairs -= 1; - } else { - return Ok(input.take_split(i)); - } - } - Err(nom::Err::Error(())) -} - -pub fn verify_pre(i: &Input) -> bool { - if i.c.use_sub_superscript.is_nil() { - return false; - } - let s = i.s; - if s.is_empty() { - return false; - } - let last = s.as_bytes()[s.len() - 1]; - last != b' ' && last != b'\t' -} - -#[test] -fn parse() { - use crate::ast::Subscript; - use crate::config::{ParseConfig, UseSubSuperscript}; - use crate::tests::to_ast; - - let to_subscript = to_ast::(subscript_node); - - insta::assert_debug_snapshot!( - to_subscript("_*").syntax, - @r###" - SUBSCRIPT@0..2 - UNDERSCORE@0..1 "_" - TEXT@1..2 "*" - "### - ); - - insta::assert_debug_snapshot!( - to_subscript("_{*bo\nld*}").syntax, - @r###" - SUBSCRIPT@0..10 - UNDERSCORE@0..1 "_" - L_CURLY@1..2 "{" - BOLD@2..9 - STAR@2..3 "*" - TEXT@3..8 "bo\nld" - STAR@8..9 "*" - R_CURLY@9..10 "}" - "### - ); - - insta::assert_debug_snapshot!( - to_subscript("_+123").syntax, - @r###" - SUBSCRIPT@0..5 - UNDERSCORE@0..1 "_" - TEXT@1..2 "+" - TEXT@2..5 "123" - "### - ); - - insta::assert_debug_snapshot!( - to_subscript("_abc").syntax, - @r###" - SUBSCRIPT@0..4 - UNDERSCORE@0..1 "_" - TEXT@1..4 "abc" - "### - ); - - let with_brace = ParseConfig { - use_sub_superscript: UseSubSuperscript::Brace, - ..Default::default() - }; - - debug_assert!(subscript_node(("_*", &with_brace).into()).is_err()); - debug_assert!(subscript_node(("_abc", &with_brace).into()).is_err()); - debug_assert!(subscript_node(("_+123", &with_brace).into()).is_err()); - debug_assert!(subscript_node(("_{*bo\nld*}", &with_brace).into()).is_ok()); -} diff --git a/src/syntax/table.rs b/src/syntax/table.rs deleted file mode 100644 index eb77a40..0000000 --- a/src/syntax/table.rs +++ /dev/null @@ -1,265 +0,0 @@ -use nom::{ - bytes::complete::take_while, - character::complete::{multispace0, space0}, - combinator::iterator, - sequence::tuple, - Err, IResult, InputTake, Slice, -}; - -use super::{ - combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder}, - input::Input, - keyword::tblfm_keyword_nodes, - object::standard_object_nodes, - SyntaxKind::*, -}; - -fn org_table_node_base(input: Input) -> IResult { - let mut children = vec![]; - - let mut start = 0; - for i in line_ends_iter(input.as_str()) { - let line = input.slice(start..i); - let trimmed = line.as_str().trim_start_matches([' ', '\t']); - - // Org tables end at the first line not starting with a vertical bar. - if !trimmed.starts_with('|') { - break; - } - - if trimmed.starts_with("|-") { - children.push(node(ORG_TABLE_RULE_ROW, [line.text_token()])); - } else { - children.push(table_standard_row_node(line)?); - } - - start = i; - } - - if start == 0 { - return Err(nom::Err::Error(())); - } - - let input = input.slice(start..); - - let (input, tblfm) = tblfm_keyword_nodes(input)?; - - let (input, post_blank) = blank_lines(input)?; - - children.extend(tblfm); - children.extend(post_blank); - - Ok((input, node(ORG_TABLE, children))) -} - -fn table_standard_row_node(input: Input) -> Result> { - let mut b = NodeBuilder::new(); - - let (input, ws) = space0(input)?; - - b.ws(ws); - - let mut it = iterator( - input, - tuple((pipe_token, multispace0, take_while(|c: char| c != '|'))), - ); - - it.for_each(|(pipe, ws, input)| { - b.push(pipe); - b.ws(ws); - - if input.is_empty() { - return; - } - - match input - .as_bytes() - .iter() - .rposition(|b| !b.is_ascii_whitespace()) - { - Some(idx) => { - let (ws, cell) = input.take_split(idx + 1); - b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell))); - b.ws(ws); - } - _ => { - b.push(node(ORG_TABLE_CELL, standard_object_nodes(input))); - } - } - }); - let (input, _) = it.finish()?; - debug_assert!(input.is_empty()); - - Ok(b.finish(ORG_TABLE_STANDARD_ROW)) -} - -fn table_el_node_base(input: Input) -> IResult { - let mut start = 0; - for i in line_ends_iter(input.as_str()) { - let line = &input.s[start..i]; - let trimmed = line.trim(); - - if start == 0 { - // Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs - if !trimmed.starts_with("+-") || trimmed.bytes().any(|c| c != b'+' && c != b'-') { - return Err(Err::Error(())); - } - } - - // Table.el tables end at the first line not starting with either a vertical line or a plus sign. - if !trimmed.starts_with('|') && !trimmed.starts_with('+') { - break; - } - - start = i; - } - - let (input, contents) = input.take_split(start); - let (input, post_blank) = blank_lines(input)?; - - let mut children = vec![]; - children.push(contents.text_token()); - children.extend(post_blank); - - Ok((input, node(TABLE_EL, children))) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn org_table_node(input: Input) -> IResult { - crate::lossless_parser!(org_table_node_base, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn table_el_node(input: Input) -> IResult { - crate::lossless_parser!(table_el_node_base, input) -} - -#[test] -fn parse_org_table() { - use crate::{ast::OrgTable, tests::to_ast}; - - let to_org_table = to_ast::(org_table_node); - - insta::assert_debug_snapshot!( - to_org_table("|").syntax, - @r###" - ORG_TABLE@0..1 - ORG_TABLE_STANDARD_ROW@0..1 - PIPE@0..1 "|" - "### - ); - - insta::assert_debug_snapshot!( - to_org_table( -r#"| -|- -|a -|- -| a | -"# - ).syntax, - @r###" - ORG_TABLE@0..20 - ORG_TABLE_STANDARD_ROW@0..2 - PIPE@0..1 "|" - WHITESPACE@1..2 "\n" - ORG_TABLE_RULE_ROW@2..5 - TEXT@2..5 "|-\n" - ORG_TABLE_STANDARD_ROW@5..8 - PIPE@5..6 "|" - ORG_TABLE_CELL@6..7 - TEXT@6..7 "a" - WHITESPACE@7..8 "\n" - ORG_TABLE_RULE_ROW@8..11 - TEXT@8..11 "|-\n" - ORG_TABLE_STANDARD_ROW@11..20 - PIPE@11..12 "|" - WHITESPACE@12..15 " " - ORG_TABLE_CELL@15..16 - TEXT@15..16 "a" - WHITESPACE@16..18 " " - PIPE@18..19 "|" - WHITESPACE@19..20 "\n" - "### - ); - - insta::assert_debug_snapshot!( - to_org_table("| a |\n#+tblfm: test").syntax, - @r###" - ORG_TABLE@0..19 - ORG_TABLE_STANDARD_ROW@0..6 - PIPE@0..1 "|" - WHITESPACE@1..2 " " - ORG_TABLE_CELL@2..3 - TEXT@2..3 "a" - WHITESPACE@3..4 " " - PIPE@4..5 "|" - WHITESPACE@5..6 "\n" - KEYWORD@6..19 - HASH_PLUS@6..8 "#+" - TEXT@8..13 "tblfm" - COLON@13..14 ":" - TEXT@14..19 " test" - "### - ); - - insta::assert_debug_snapshot!( - to_org_table("| a |\n#+TBLFM: test1\n#+TBLFM: test2").syntax, - @r###" - ORG_TABLE@0..35 - ORG_TABLE_STANDARD_ROW@0..6 - PIPE@0..1 "|" - WHITESPACE@1..2 " " - ORG_TABLE_CELL@2..3 - TEXT@2..3 "a" - WHITESPACE@3..4 " " - PIPE@4..5 "|" - WHITESPACE@5..6 "\n" - KEYWORD@6..21 - HASH_PLUS@6..8 "#+" - TEXT@8..13 "TBLFM" - COLON@13..14 ":" - TEXT@14..20 " test1" - NEW_LINE@20..21 "\n" - KEYWORD@21..35 - HASH_PLUS@21..23 "#+" - TEXT@23..28 "TBLFM" - COLON@28..29 ":" - TEXT@29..35 " test2" - "### - ); -} - -#[test] -fn parse_table_el() { - use crate::{ast::TableEl, tests::to_ast, ParseConfig}; - - let to_table_el = to_ast::(table_el_node); - - insta::assert_debug_snapshot!( - to_table_el( - r#" +---+ - | | - +---+ - - "# - ).syntax, - @r###" - TABLE_EL@0..37 - TEXT@0..32 " +---+\n | |\n ..." - BLANK_LINE@32..33 "\n" - BLANK_LINE@33..37 " " - "### - ); - - let config = &ParseConfig::default(); - - assert!(table_el_node(("", config).into()).is_err()); - assert!(table_el_node(("+----|---", config).into()).is_err()); -} diff --git a/src/syntax/target.rs b/src/syntax/target.rs deleted file mode 100644 index ceba339..0000000 --- a/src/syntax/target.rs +++ /dev/null @@ -1,69 +0,0 @@ -use nom::{ - bytes::complete::take_while, - combinator::{map, verify}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{l_angle2_token, node, r_angle2_token, GreenElement}, - input::Input, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn target_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_angle2_token, - verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &Input| { - s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ') - }, - ), - r_angle2_token, - )), - |(l_angle2, target, r_angle2)| node(TARGET, [l_angle2, target.text_token(), r_angle2]), - ); - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::Target, tests::to_ast, ParseConfig}; - - let to_target = to_ast::(target_node); - - insta::assert_debug_snapshot!( - to_target("<>").syntax, - @r###" - TARGET@0..10 - L_ANGLE2@0..2 "<<" - TEXT@2..8 "target" - R_ANGLE2@8..10 ">>" - "### - ); - - insta::assert_debug_snapshot!( - to_target("<>").syntax, - @r###" - TARGET@0..11 - L_ANGLE2@0..2 "<<" - TEXT@2..9 "tar get" - R_ANGLE2@9..11 ">>" - "### - ); - - let config = &ParseConfig::default(); - - assert!(target_node(("<>", config).into()).is_err()); - assert!(target_node(("<< target>>", config).into()).is_err()); - assert!(target_node(("<>", config).into()).is_err()); - assert!(target_node(("<get>>", config).into()).is_err()); - assert!(target_node(("<>", config).into()).is_err()); - assert!(target_node(("<", config).into()).is_err()); -} diff --git a/src/syntax/timestamp.rs b/src/syntax/timestamp.rs deleted file mode 100644 index 31686d3..0000000 --- a/src/syntax/timestamp.rs +++ /dev/null @@ -1,357 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_till, take_while1, take_while_m_n}, - character::complete::{digit1, space0, space1}, - combinator::{iterator, map, opt}, - sequence::tuple, - IResult, -}; - -use super::{ - combinator::{ - colon_token, l_angle_token, l_bracket_token, l_parens_token, minus2_token, minus_token, - node, percent2_token, r_angle_token, r_bracket_token, r_parens_token, GreenElement, - NodeBuilder, - }, - input::Input, - SyntaxKind::*, -}; - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn timestamp_diary_node(input: Input) -> IResult { - let mut parser = map( - tuple(( - l_angle_token, - percent2_token, - l_parens_token, - take_till(|c| c == ')' || c == '>' || c == '\n'), - r_parens_token, - r_angle_token, - )), - |(l_angle, percent2, l_paren, value, r_paren, r_angle)| { - node( - TIMESTAMP_DIARY, - [ - l_angle, - percent2, - l_paren, - value.text_token(), - r_paren, - r_angle, - ], - ) - }, - ); - crate::lossless_parser!(parser, input) -} - -fn date(i: Input) -> IResult { - map( - tuple(( - take_while_m_n(4, 4, |c: char| c.is_ascii_digit()), - minus_token, - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - minus_token, - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - )), - |(year, minus, month, minus_, day)| { - [ - year.token(TIMESTAMP_YEAR), - minus, - month.token(TIMESTAMP_MONTH), - minus_, - day.token(TIMESTAMP_DAY), - ] - }, - )(i) -} - -fn dayname(i: Input) -> IResult { - map( - take_while1(|c: char| { - !c.is_ascii_whitespace() - && !c.is_ascii_digit() - && c != '+' - && c != '-' - && c != ']' - && c != '>' - && c != '.' - }), - |i: Input| i.token(TIMESTAMP_DAYNAME), - )(i) -} - -fn time(i: Input) -> IResult { - map( - tuple(( - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - colon_token, - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - )), - |(hour, colon, minute)| { - [ - hour.token(TIMESTAMP_HOUR), - colon, - minute.token(TIMESTAMP_MINUTE), - ] - }, - )(i) -} - -fn repeater_or_delay( - input: Input, -) -> IResult { - let (input, mark) = alt(( - map(alt((tag("++"), tag("+"), tag(".+"))), |i: Input| { - i.token(TIMESTAMP_REPEATER_MARK) - }), - map(alt((tag("--"), tag("-"))), |i: Input| { - i.token(TIMESTAMP_DELAY_MARK) - }), - ))(input)?; - let (input, value) = digit1(input)?; - let (input, unit) = alt((tag("h"), tag("d"), tag("w"), tag("m"), tag("y")))(input)?; - - Ok(( - input, - ( - mark, - value.token(TIMESTAMP_VALUE), - unit.token(TIMESTAMP_UNIT), - ), - )) -} - -fn timestamp_node_base( - input: Input, - l_parser: impl Fn(Input) -> IResult, - r_parser: impl Fn(Input) -> IResult, -) -> IResult, ()> { - let (input, l_angle) = l_parser(input)?; - let (input, start_date) = date(input)?; - let (input, start_dayname) = opt(tuple((space1, dayname)))(input)?; - let (input, start_time) = opt(tuple((space1, time)))(input)?; - - let mut b = NodeBuilder::new(); - b.push(l_angle); - b.children.extend(start_date); - - if let Some((ws, dayname)) = start_dayname { - b.push(ws.ws_token()); - b.push(dayname); - } - - if input.as_str().starts_with('-') { - let (ws, start_time) = match start_time { - Some(start_time) => start_time, - None => return Err(nom::Err::Error(())), - }; - - let (input, minus) = minus_token(input)?; - let (input, end_time) = time(input)?; - - b.ws(ws); - b.children.extend(start_time); - b.push(minus); - b.children.extend(end_time); - - let mut iter = iterator(input, tuple((space1, repeater_or_delay))); - for (ws, (mark, value, unit)) in &mut iter { - b.children.extend([ws.ws_token(), mark, value, unit]); - } - let (input, _) = iter.finish()?; - - let (input, space) = space0(input)?; - let (input, r_angle) = r_parser(input)?; - - b.ws(space); - b.push(r_angle); - - return Ok((input, b.children)); - } - - if let Some((ws, start_time)) = start_time { - b.ws(ws); - b.children.extend(start_time); - } - - let mut iter = iterator(input, tuple((space1, repeater_or_delay))); - for (ws, (mark, value, unit)) in &mut iter { - b.children.extend([ws.ws_token(), mark, value, unit]); - } - let (input, _) = iter.finish()?; - - let (input, space) = space0(input)?; - let (input, r_angle) = r_parser(input)?; - - b.ws(space); - b.push(r_angle); - - if input.as_str().starts_with("--") { - let (input, minus2) = minus2_token(input)?; - let (input, l_angle) = l_parser(input)?; - let (input, end_date) = date(input)?; - let (input, end_dayname) = opt(tuple((space1, dayname)))(input)?; - let (input, end_time) = opt(tuple((space1, time)))(input)?; - - b.children.extend([minus2, l_angle]); - b.children.extend(end_date); - if let Some((ws, dayname)) = end_dayname { - b.push(ws.ws_token()); - b.push(dayname); - } - if let Some((ws, end_time)) = end_time { - b.ws(ws); - b.children.extend(end_time); - } - let mut iter = iterator(input, tuple((space1, repeater_or_delay))); - for (ws, (mark, value, unit)) in &mut iter { - b.children.extend([ws.ws_token(), mark, value, unit]); - } - let (input, _) = iter.finish()?; - - let (input, space_) = space0(input)?; - let (input, r_angle) = r_parser(input)?; - - b.ws(space_); - b.push(r_angle); - - Ok((input, b.children)) - } else { - Ok((input, b.children)) - } -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn timestamp_active_node(input: Input) -> IResult { - fn parser(input: Input) -> IResult { - let (input, children) = timestamp_node_base(input, l_angle_token, r_angle_token)?; - Ok((input, node(TIMESTAMP_ACTIVE, children))) - } - crate::lossless_parser!(parser, input) -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(level = "debug", skip(input), fields(input = input.s)) -)] -pub fn timestamp_inactive_node(input: Input) -> IResult { - fn parser(input: Input) -> IResult { - let (input, children) = timestamp_node_base(input, l_bracket_token, r_bracket_token)?; - Ok((input, node(TIMESTAMP_INACTIVE, children))) - } - crate::lossless_parser!(parser, input) -} - -#[test] -fn parse() { - use crate::{ast::Timestamp, tests::to_ast}; - - let to_timestamp = to_ast::(timestamp_inactive_node); - - to_timestamp("[2003-09-16]"); - to_timestamp("[2003-09-16 09:09]"); - to_timestamp("[2003-09-16 Tue]"); - to_timestamp("[2003-09-16 Tue 09:09]"); - to_timestamp("[2003-09-16]--[2003-09-16]"); - to_timestamp("[2003-09-16 09:09]--[2003-09-16 09:09]"); - to_timestamp("[2003-09-16]--[2003-09-16 09:09]"); - to_timestamp("[2003-09-16 Tue]--[2003-09-16 Tue]"); - to_timestamp("[2003-09-16 Tue 09:09]--[2003-09-16 Tue 09:09]"); - to_timestamp("[2003-09-16 Tue 09:09-09:09]"); - to_timestamp("[2003-09-16 09:09-09:09 ]"); - to_timestamp("[2003-09-16 09:09 +1w .+1d]"); - to_timestamp("[2003-09-16 09:09]--[2003-09-16 +1w .+1d --1d ]"); - to_timestamp("[2003-09-16 Tue 09:09 +1w]--[2003-09-16 .+1d --1d ]"); - to_timestamp("[2003-09-16 09:09-10:19 +1w --1d]"); - - let ts = to_timestamp("[2003-09-16 Tue +1w]"); - assert!(!ts.is_range()); - insta::assert_debug_snapshot!( - ts.syntax, - @r###" - TIMESTAMP_INACTIVE@0..20 - L_BRACKET@0..1 "[" - TIMESTAMP_YEAR@1..5 "2003" - MINUS@5..6 "-" - TIMESTAMP_MONTH@6..8 "09" - MINUS@8..9 "-" - TIMESTAMP_DAY@9..11 "16" - WHITESPACE@11..12 " " - TIMESTAMP_DAYNAME@12..15 "Tue" - WHITESPACE@15..16 " " - TIMESTAMP_REPEATER_MARK@16..17 "+" - TIMESTAMP_VALUE@17..18 "1" - TIMESTAMP_UNIT@18..19 "w" - R_BRACKET@19..20 "]" - "### - ); - - let ts = to_timestamp("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"); - assert!(ts.is_range()); - insta::assert_debug_snapshot!( - ts.syntax, - @r###" - TIMESTAMP_INACTIVE@0..46 - L_BRACKET@0..1 "[" - TIMESTAMP_YEAR@1..5 "2003" - MINUS@5..6 "-" - TIMESTAMP_MONTH@6..8 "09" - MINUS@8..9 "-" - TIMESTAMP_DAY@9..11 "16" - WHITESPACE@11..12 " " - TIMESTAMP_DAYNAME@12..15 "Tue" - WHITESPACE@15..16 " " - TIMESTAMP_HOUR@16..18 "09" - COLON@18..19 ":" - TIMESTAMP_MINUTE@19..21 "39" - R_BRACKET@21..22 "]" - MINUS2@22..24 "--" - L_BRACKET@24..25 "[" - TIMESTAMP_YEAR@25..29 "2003" - MINUS@29..30 "-" - TIMESTAMP_MONTH@30..32 "09" - MINUS@32..33 "-" - TIMESTAMP_DAY@33..35 "16" - WHITESPACE@35..36 " " - TIMESTAMP_DAYNAME@36..39 "Tue" - WHITESPACE@39..40 " " - TIMESTAMP_HOUR@40..42 "10" - COLON@42..43 ":" - TIMESTAMP_MINUTE@43..45 "39" - R_BRACKET@45..46 "]" - "### - ); - - let ts = to_timestamp("[2003-09-16 Tue 09:39-10:39]"); - assert!(ts.is_range()); - insta::assert_debug_snapshot!( - ts.syntax, - @r###" - TIMESTAMP_INACTIVE@0..28 - L_BRACKET@0..1 "[" - TIMESTAMP_YEAR@1..5 "2003" - MINUS@5..6 "-" - TIMESTAMP_MONTH@6..8 "09" - MINUS@8..9 "-" - TIMESTAMP_DAY@9..11 "16" - WHITESPACE@11..12 " " - TIMESTAMP_DAYNAME@12..15 "Tue" - WHITESPACE@15..16 " " - TIMESTAMP_HOUR@16..18 "09" - COLON@18..19 ":" - TIMESTAMP_MINUTE@19..21 "39" - MINUS@21..22 "-" - TIMESTAMP_HOUR@22..24 "10" - COLON@24..25 ":" - TIMESTAMP_MINUTE@25..27 "39" - R_BRACKET@27..28 "]" - "### - ); -} diff --git a/src/tests.rs b/src/tests.rs deleted file mode 100644 index 5ed57c3..0000000 --- a/src/tests.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! test utils - -use nom::IResult; -use rowan::{ast::AstNode, SyntaxNode}; - -use crate::{ - syntax::{combinator::GreenElement, input::Input}, - ParseConfig, -}; - -pub fn to_ast( - parser: impl Fn(Input) -> IResult, -) -> impl Fn(&str) -> N { - move |s: &str| { - let input = Input { - s, - c: &ParseConfig::default(), - }; - let element = parser(input).unwrap().1; - let node = element.into_node().unwrap(); - let node = SyntaxNode::::new_root(node); - AstNode::cast(node).unwrap() - } -} diff --git a/src/validate.rs b/src/validate.rs new file mode 100644 index 0000000..535b268 --- /dev/null +++ b/src/validate.rs @@ -0,0 +1,217 @@ +use indextree::NodeId; +use std::ops::RangeInclusive; + +use crate::elements::{Element, Table, TableCell, TableRow}; +use crate::Org; + +/// Validation Error +#[derive(Debug)] +pub enum ValidationError { + /// Expected at least one child + ExpectedChildren { + at: NodeId, + }, + /// Expected no children + UnexpectedChildren { + at: NodeId, + }, + UnexpectedElement { + expected: &'static str, + at: NodeId, + }, + /// Expected a detached element + ExpectedDetached { + at: NodeId, + }, + /// Expected headline level in specify range + HeadlineLevelMismatch { + range: RangeInclusive, + at: NodeId, + }, +} + +impl ValidationError { + pub fn element<'a, 'b>(&self, org: &'a Org<'b>) -> &'a Element<'b> { + match self { + ValidationError::ExpectedChildren { at } + | ValidationError::UnexpectedChildren { at } + | ValidationError::UnexpectedElement { at, .. } + | ValidationError::ExpectedDetached { at } + | ValidationError::HeadlineLevelMismatch { at, .. } => &org[*at], + } + } +} + +pub type ValidationResult = Result; + +impl Org<'_> { + /// Validates an `Org` struct. + pub fn validate(&self) -> Vec { + let mut errors = Vec::new(); + + macro_rules! expect_element { + ($node:ident, $expect:expr, $($pattern:pat)|+) => { + match self[$node] { + $($pattern)|+ => (), + _ => errors.push(ValidationError::UnexpectedElement { + expected: $expect, + at: $node + }), + } + }; + } + + macro_rules! expect_children { + ($node:ident) => { + if self.arena[$node].first_child().is_none() { + errors.push(ValidationError::ExpectedChildren { at: $node }); + } + }; + } + + for node_id in self.root.descendants(&self.arena) { + let node = &self.arena[node_id]; + match node.get() { + Element::Document { .. } => { + let mut children = node_id.children(&self.arena); + if let Some(child) = children.next() { + expect_element!( + child, + "Headline|Section", + Element::Headline { .. } | Element::Section + ); + } + + for child in children { + expect_element!( + child, + "Headline", + Element::Headline { .. } + ); + } + } + Element::Headline { .. } => { + expect_children!(node_id); + + let mut children = node_id.children(&self.arena); + if let Some(child) = children.next() { + expect_element!(child, "Title", Element::Title(_)); + } + + if let Some(child) = children.next() { + expect_element!( + child, + "Headline|Section", + Element::Headline { .. } | Element::Section + ); + } + + for child in children { + expect_element!( + child, + "Headline", + Element::Headline { .. } + ); + } + } + Element::Title(title) => { + if !title.raw.is_empty() && node.first_child().is_none() { + errors.push(ValidationError::ExpectedChildren { at: node_id }); + } + } + Element::List(_) => { + expect_children!(node_id); + for child in node_id.children(&self.arena) { + expect_element!(child, "ListItem", Element::ListItem(_)); + } + } + Element::Table(Table::Org { .. }) => { + for child in node_id.children(&self.arena) { + expect_element!(child, "TableRow", Element::TableRow(_)); + } + } + Element::TableRow(TableRow::Header) => { + for child in node_id.children(&self.arena) { + expect_element!( + child, + "TableCell::Header", + Element::TableCell(TableCell::Header) + ); + } + } + Element::TableRow(TableRow::Body) => { + for child in node_id.children(&self.arena) { + expect_element!( + child, + "TableCell::Body", + Element::TableCell(TableCell::Body) + ); + } + } + Element::CommentBlock(_) + | Element::ExampleBlock(_) + | Element::ExportBlock(_) + | Element::SourceBlock(_) + | Element::BabelCall(_) + | Element::InlineSrc(_) + | Element::Code { .. } + | Element::FnRef(_) + | Element::InlineCall(_) + | Element::Link(_) + | Element::Macros(_) + | Element::RadioTarget + | Element::Snippet(_) + | Element::Target(_) + | Element::Text { .. } + | Element::Timestamp(_) + | Element::Verbatim { .. } + | Element::FnDef(_) + | Element::Clock(_) + | Element::Comment { .. } + | Element::FixedWidth { .. } + | Element::Keyword(_) + | Element::Rule(_) + | Element::Cookie(_) + | Element::TableRow(TableRow::BodyRule) + | Element::TableRow(TableRow::HeaderRule) => { + if node.first_child().is_some() { + errors.push(ValidationError::UnexpectedChildren { at: node_id }); + } + } + Element::SpecialBlock(_) + | Element::QuoteBlock(_) + | Element::CenterBlock(_) + | Element::VerseBlock(_) + | Element::Paragraph { .. } + | Element::Section + | Element::Bold + | Element::Italic + | Element::Underline + | Element::Strike + | Element::DynBlock(_) => { + expect_children!(node_id); + } + Element::ListItem(_) + | Element::Drawer(_) + | Element::TableCell(_) + | Element::Table(_) => (), + } + } + errors + } + + pub(crate) fn debug_validate(&self) { + if cfg!(debug_assertions) { + let errors = self.validate(); + if !errors.is_empty() { + eprintln!("Org validation failed. {} error(s) found:", errors.len()); + for err in errors { + eprintln!("{:?} at {:?}", err, err.element(self)); + } + panic!( + "Looks like there's a bug in orgize! Please report it with your org-mode content at https://github.com/PoiScript/orgize/issues." + ); + } + } + } +} diff --git a/tests/blank.rs b/tests/blank.rs new file mode 100644 index 0000000..288e26a --- /dev/null +++ b/tests/blank.rs @@ -0,0 +1,84 @@ +use orgize::Org; + +const ORG_STR: &str = r#" + +#+TITLE: org + +#+BEGIN_QUOTE + +CONTENTS + +#+END_QUOTE + +* Headline 1 +SCHEDULED: <2019-10-28 Mon> +:PROPERTIES: +:ID: headline-1 +:END: + +:LOGBOOK: + +CLOCK: [2019-10-28 Mon 08:53] + +CLOCK: [2019-10-28 Mon 08:53]--[2019-10-28 Mon 08:53] => 0:00 + +:END: + +----- + +#+CALL: VALUE + +# +# Comment +# + +#+BEGIN: NAME PARAMETERS + +CONTENTS + +#+END: + +: +: Fixed width +: + +#+BEGIN_COMMENT + +COMMENT + +#+END_COMMENT + +#+BEGIN_EXAMPLE +#+END_EXAMPLE + + 1. 1 + +2. 2 + + 3. 3 + + + 1 + + + 2 + + - 3 + + - 4 + + + 5 + + + +"#; + +#[test] +fn blank() { + let org = Org::parse(ORG_STR); + + let mut writer = Vec::new(); + org.write_org(&mut writer).unwrap(); + + // eprintln!("{}", serde_json::to_string_pretty(&org).unwrap()); + + assert_eq!(String::from_utf8(writer).unwrap(), ORG_STR); +} diff --git a/tests/html.rs b/tests/html.rs deleted file mode 100644 index ab1a60e..0000000 --- a/tests/html.rs +++ /dev/null @@ -1,176 +0,0 @@ -use orgize::Org; - -#[test] -fn emphasis() { - insta::assert_snapshot!( - Org::parse("*bold*, /italic/,\n_underlined_, =verbatim= and ~code~").to_html(), - @r###" -

    bold, italic, - underlined, verbatim and code

    - "### - ); -} - -#[test] -fn link() { - insta::assert_snapshot!( - Org::parse("Visit[[http://example.com][link1]]or[[http://example.com][link1]].").to_html(), - @r###"

    Visitlink1orlink1.

    "### - ); -} - -#[test] -fn section_and_headline() { - insta::assert_snapshot!( - Org::parse(r#" -* title 1 -section 1 -** title 2 -section 2 -* title 3 -section 3 -* title 4 -section 4 -"#).to_html(), - @r###" -

    title 1

    section 1 -

    title 2

    section 2 -

    title 3

    section 3 -

    title 4

    section 4 -

    - "### - ); -} - -#[test] -fn list() { - insta::assert_snapshot!( - Org::parse(r#" -+ 1 - -+ 2 - - - 3 - - - 4 - -+ 5 -"#).to_html(), - @r###" -
    • 1 -

    • 2 -

      • 3 -

      • 4 -

    • 5 -

    - "### - ); -} - -#[test] -fn snippet() { - insta::assert_snapshot!( - Org::parse("@@html:@@delete this@@html:@@").to_html(), - @"

    delete this

    " - ); -} - -#[test] -fn paragraphs() { - insta::assert_snapshot!( - Org::parse(r#" -* title - -paragraph 1 - -paragraph 2 - -paragraph 3 - -paragraph 4 -"#).to_html(), - @r###" -

    title

    paragraph 1 -

    paragraph 2 -

    paragraph 3 -

    paragraph 4 -

    - "### - ); -} - -#[test] -fn table() { - // don't has table header - insta::assert_snapshot!( - Org::parse(r#" -|-----+-----+-----| -| 0 | 1 | 2 | -| 4 | 5 | 6 | -|-----+-----+-----| -"#).to_html(), - @"
    012
    456
    " - ); - - // has table header - insta::assert_snapshot!( - Org::parse(r#" -| 0 | 1 | 2 | -|-----+-----+-----| -| 4 | 5 | 6 | -|-----+-----+-----| -"#).to_html(), - @"
    012
    456
    " - ); - - // has two table body - insta::assert_snapshot!( - Org::parse(r#" -| 0 | 1 | 2 | -|-----+-----+-----| -| 4 | 5 | 6 | -|-----+-----+-----| -| 7 | 8 | 9 | -"#).to_html(), - @"
    012
    456
    789
    " - ); - - // multiple row rule - insta::assert_snapshot!( - Org::parse(r#" -| 0 | 1 | 2 | -|-----+-----+-----| -|-----+-----+-----| -| 4 | 5 | 6 | -"#).to_html(), - @"
    012
    456
    " - ); - - // empty - insta::assert_snapshot!( - Org::parse(r#" -|-----+-----+-----| -|-----+-----+-----| -"#).to_html(), - @"
    " - ); - - insta::assert_snapshot!( - Org::parse(r#" -| -|- -| -|- -| -"#).to_html(), - @"
    " - ); -} - -#[test] -fn line_break() { - insta::assert_debug_snapshot!( - Org::parse("aa\\\\\nbb").to_html(), - @r###""

    aa
    bb

    ""### - ); -} diff --git a/tests/issue_10.rs b/tests/issue_10.rs new file mode 100644 index 0000000..f0570f8 --- /dev/null +++ b/tests/issue_10.rs @@ -0,0 +1,18 @@ +use orgize::Org; + +#[test] +fn can_handle_empty_emphasis() { + let cases = &[ + "* / // a", + "\"* / // a\"", + "* * ** a", + "* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n", + "* 2020\n:DRAWER:\n* ** a\n:END:", + "* * ** :a:", + "* * ** " + ]; + + for case in cases { + let _ = Org::parse(case); + } +} diff --git a/tests/issue_11.rs b/tests/issue_11.rs new file mode 100644 index 0000000..d22e72b --- /dev/null +++ b/tests/issue_11.rs @@ -0,0 +1,21 @@ +use orgize::Org; + +#[test] +fn can_handle_empty_list_item() { + let cases = &[ + "0. ", + "* \n0. ", + " * ", + " 0. ", + "\t* ", + "- ", + "- hello\n- ", + "- \n- hello", + "- hello\n- \n- world", + "* world\n- ", + ]; + + for case in cases { + let _ = Org::parse(case); + } +} diff --git a/tests/issue_15_16.rs b/tests/issue_15_16.rs new file mode 100644 index 0000000..3d8d9fc --- /dev/null +++ b/tests/issue_15_16.rs @@ -0,0 +1,26 @@ +use orgize::Org; + +#[test] +fn bad_headline_tags() { + contains_no_tag(Org::parse("* a ::")); + + contains_no_tag(Org::parse("* a :(:")); + + contains_one_tag(Org::parse("* a \t:_:"), "_"); + + contains_one_tag(Org::parse("* a \t :@:"), "@"); + + contains_one_tag(Org::parse("* a :#:"), "#"); + + contains_one_tag(Org::parse("* a\t :%:"), "%"); + + contains_one_tag(Org::parse("* a :余:"), "余"); +} + +fn contains_no_tag(org: Org) { + assert!(org.headlines().next().unwrap().title(&org).tags.is_empty()); +} + +fn contains_one_tag(org: Org, tag: &str) { + assert_eq!(vec![tag], org.headlines().next().unwrap().title(&org).tags); +} diff --git a/tests/issue_22.rs b/tests/issue_22.rs new file mode 100644 index 0000000..c9a71ee --- /dev/null +++ b/tests/issue_22.rs @@ -0,0 +1,24 @@ +use orgize::Org; + +#[test] +fn whitespaces() { + let org = Org::parse(" "); + + assert(&org); + + let org = Org::parse("\t \t \n \t \t \n \t"); + + assert(&org); + + let org = Org::parse("\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}"); + + assert(&org); +} + +fn assert(org: &Org) { + assert_eq!( + org.iter().count(), + 2, + "should contains only one element - document" + ); +} diff --git a/tests/parse.rs b/tests/parse.rs index 813316b..69fb98c 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -1,37 +1,172 @@ -const INPUT: &[&str] = &[ - // issue 10 - "* / // a", - "\"* / // a\"", - "* * ** a", - "* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n", - "* 2020\n:DRAWER:\n* ** a\n:END:", - "* * ** :a:", - "* * ** ", - // issue 11 - "0. ", - "* \n0. ", - " * ", - " 0. ", - "\t* ", - "- ", - "- hello\n- ", - "- \n- hello", - "- hello\n- \n- world", - "* world\n- ", - // issue 22 - "\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}", - // fuzz test - "___\n", - "\n\n\n", - "\n*", - "\r-", - "6\r\n", - "|\n\u{b}|" -]; +use orgize::Org; +use pretty_assertions::assert_eq; -#[test] -fn parse() { - for input in INPUT { - let _ = orgize::Org::parse(input); - } +macro_rules! test_suite { + ($name:ident, $content:expr, $expected:expr) => { + #[test] + fn $name() { + let mut writer = Vec::new(); + let org = Org::parse($content); + org.write_html(&mut writer).unwrap(); + let string = String::from_utf8(writer).unwrap(); + assert_eq!(string, $expected); + } + }; } + +test_suite!( + emphasis, + "*bold*, /italic/,\n_underlined_, =verbatim= and ~code~", + "

    bold, italic,\nunderlined, \ + verbatim and code

    " +); + +test_suite!( + link, + "Visit[[http://example.com][link1]]or[[http://example.com][link1]].", + r#"

    Visitlink1orlink1.

    "# +); + +test_suite!( + section_and_headline, + r#" +* title 1 +section 1 +** title 2 +section 2 +* title 3 +section 3 +* title 4 +section 4 +"#, + "

    title 1

    section 1

    \ +

    title 2

    section 2

    \ +

    title 3

    section 3

    \ +

    title 4

    section 4

    " +); + +test_suite!( + list, + r#" ++ 1 + ++ 2 + + - 3 + + - 4 + ++ 5 +"#, + "
      \ +
    • 1

    • \ +
    • 2

      • 3

      • 4

    • \ +
    • 5

    • \ +
    " +); + +test_suite!( + snippet, + "@@html:@@delete this@@html:@@", + "

    delete this

    " +); + +test_suite!( + paragraphs, + r#" +* title + +paragraph 1 + +paragraph 2 + +paragraph 3 + +paragraph 4 +"#, + "

    title

    \ +

    paragraph 1

    paragraph 2

    \ +

    paragraph 3

    paragraph 4

    \ +
    " +); + +test_suite!( + table1, + r#" +|-----+-----+-----| +| 0 | 1 | 2 | +|-----+-----+-----| +| 4 | 5 | 6 | +"#, + "
    \ + \ + \ +
    012
    456
    " +); + +test_suite!( + table2, + r#" +|-----+-----+-----| +| 0 | 1 | 2 | +| 4 | 5 | 6 | +|-----+-----+-----| +"#, + "
    \ + \ + \ +
    012
    456
    " +); + +test_suite!( + table3, + r#" +|-----+-----+-----| +|-----+-----+-----| +| 0 | 1 | 2 | +| 4 | 5 | 6 | +"#, + "
    \ + \ + \ +
    012
    456
    " +); + +test_suite!( + table4, + r#" +| 0 | 1 | 2 | +| 4 | 5 | 6 | +|-----+-----+-----| +|-----+-----+-----| +"#, + "
    \ + \ + \ +
    012
    456
    " +); + +test_suite!( + table5, + r#" +|-----+-----+-----| +|-----+-----+-----| +"#, + "
    " +); + +test_suite!( + table6, + r#" +| +|- +| +|- +| +"#, + "
    \ + \ + \ + \ +
    " +); diff --git a/wasm/.gitignore b/wasm/.gitignore deleted file mode 100644 index 56aa179..0000000 --- a/wasm/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -/lib -/node_modules -/out-tsc -/package -/pkg -*.tgz \ No newline at end of file diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml deleted file mode 100644 index f2eefce..0000000 --- a/wasm/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "orgize-wasm" -publish = false -version = "0.10.0-alpha.7" -authors = ["PoiScript "] -repository = "https://github.com/PoiScript/orgize" -edition = "2021" -license = "MIT" - -[lib] -crate-type = ["cdylib", "rlib"] - -[dependencies] -orgize = { path = ".." } -wasm-bindgen = "0.2" diff --git a/wasm/README.md b/wasm/README.md deleted file mode 100644 index 673208b..0000000 --- a/wasm/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# orgize - -![npm](https://img.shields.io/npm/v/orgize) - -## Install - -```sh -npm install orgize -yarn add orgize -``` - -## Browser - -```js -import init, { Org } from "orgize"; - -init().then(() => { - const org = new Org("* Hello, /world/!"); - const html = org.html(); - console.log(html); - org.free(); -}); -``` - -## Node.js - -```js -import { Org, initSync } from "orgize"; -import { readFile } from "node:fs/promises"; - -// you can also use import.meta.resolve, but it's currently behind -// an experimental flag --experimental-import-meta-resolve -import { createRequire } from "node:module"; -const require = createRequire(import.meta.url); - -readFile(require.resolve("orgize/wasm")).then((bytes) => { - initSync(bytes); - - const org = new Org("* Hello, /world/!"); - const html = org.html(); - console.log(html); - org.free(); -}); -``` - -## Notes - -1. You must **initialize** the WebAssembly module (using either `init` or - `initSync` function) before using the `Org` class; - -2. Don't forgot to call `org.free()` to **release the memory** that - allocated by Rust; - -3. This npm package is primarily aim to demonstrate and power the online - demo, so it doesn't provide any customization or settings. - - If you need to, please build your own npm package by `wasm-pack`. - (or `napi` if you're only targeting node.js users) - -## License - -MIT diff --git a/wasm/build.rs b/wasm/build.rs deleted file mode 100644 index db4d2ca..0000000 --- a/wasm/build.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::process::Command; - -fn main() { - { - let output = Command::new("git") - .args(["rev-parse", "--short", "HEAD"]) - .output() - .unwrap(); - - let git_hash = String::from_utf8(output.stdout).unwrap(); - - println!("cargo:rustc-env=CARGO_GIT_HASH={}", git_hash); - } - - { - let output = Command::new("date").args(["-R"]).output().unwrap(); - - let git_hash = String::from_utf8(output.stdout).unwrap(); - - println!("cargo:rustc-env=CARGO_BUILD_TIME={}", git_hash); - } -} diff --git a/wasm/index.html b/wasm/index.html deleted file mode 100644 index b755776..0000000 --- a/wasm/index.html +++ /dev/null @@ -1,348 +0,0 @@ - - - - - - - Orgize - - - - - - - - - - - - -
    -
    -
    -
    - -
    -
    - - HTML (rendered) - - HTML - Syntax - Traverse -
    -
    -
    - -
    -
    -
    -
    -
    - - - - diff --git a/wasm/package.json b/wasm/package.json deleted file mode 100644 index 7c22f42..0000000 --- a/wasm/package.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "name": "orgize", - "version": "0.10.0-alpha.0", - "license": "MIT", - "author": "PoiScript ", - "scripts": { - "build": "rm -rf dist && wasm-pack build -t web -d dist --out-name orgize" - }, - "repository": { - "type": "git", - "url": "https://github.com/PoiScript/orgize" - }, - "module": "./dist/orgize.js", - "typings": "./dist/orgize.d.ts", - "exports": { - ".": { - "types": "./dist/orgize.d.ts", - "import": "./dist/orgize.js" - }, - "./wasm": "./dist/orgize_bg.wasm" - }, - "files": [ - "dist/orgize_bg.wasm", - "dist/orgize.js", - "dist/orgize.d.ts", - "index.html", - "README.md" - ] -} diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs deleted file mode 100644 index 79792cd..0000000 --- a/wasm/src/lib.rs +++ /dev/null @@ -1,133 +0,0 @@ -use orgize::{ - export::{from_fn, Container, Event}, - rowan::ast::AstNode, - Org as Inner, -}; -use std::fmt::Write; - -use wasm_bindgen::prelude::*; - -#[wasm_bindgen] -pub struct Org { - inner: Inner, -} - -#[wasm_bindgen] -impl Org { - #[wasm_bindgen(constructor)] - pub fn parse(input: &str) -> Self { - Org { - inner: Inner::parse(input), - } - } - - pub fn html(&self) -> String { - self.inner.to_html() - } - - pub fn org(&self) -> String { - self.inner.to_org() - } - - pub fn syntax(&self) -> String { - format!("{:#?}", self.inner.document().syntax()) - } - - pub fn update(&mut self, s: &str) { - self.inner = Inner::parse(s); - } - - pub fn traverse(&self) -> String { - let mut result = String::new(); - let mut ident = 0; - let mut handler = from_fn(|event| { - let (name, range) = match &event { - Event::Enter(container) => match container { - Container::Document(x) => ("Document", x.text_range()), - Container::Section(x) => ("Section", x.text_range()), - Container::Paragraph(x) => ("Paragraph", x.text_range()), - Container::Headline(x) => ("Headline", x.text_range()), - Container::OrgTable(x) => ("OrgTable", x.text_range()), - Container::OrgTableRow(x) => ("OrgTableRow", x.text_range()), - Container::OrgTableCell(x) => ("OrgTableCell", x.text_range()), - Container::TableEl(x) => ("TableEl", x.text_range()), - Container::List(x) => ("List", x.text_range()), - Container::ListItem(x) => ("ListItem", x.text_range()), - Container::Drawer(x) => ("Drawer", x.text_range()), - Container::DynBlock(x) => ("DynBlock", x.text_range()), - Container::FnDef(x) => ("FnDef", x.text_range()), - Container::Comment(x) => ("Comment", x.text_range()), - Container::FixedWidth(x) => ("FixedWidth", x.text_range()), - Container::SpecialBlock(x) => ("SpecialBlock", x.text_range()), - Container::QuoteBlock(x) => ("QuoteBlock", x.text_range()), - Container::CenterBlock(x) => ("CenterBlock", x.text_range()), - Container::VerseBlock(x) => ("VerseBlock", x.text_range()), - Container::CommentBlock(x) => ("CommentBlock", x.text_range()), - Container::ExampleBlock(x) => ("ExampleBlock", x.text_range()), - Container::ExportBlock(x) => ("ExportBlock", x.text_range()), - Container::SourceBlock(x) => ("SourceBlock", x.text_range()), - Container::Link(x) => ("Link", x.text_range()), - Container::RadioTarget(x) => ("RadioTarget", x.text_range()), - Container::FnRef(x) => ("FnRef", x.text_range()), - Container::Target(x) => ("Target", x.text_range()), - Container::Bold(x) => ("Bold", x.text_range()), - Container::Strike(x) => ("Strike", x.text_range()), - Container::Italic(x) => ("Italic", x.text_range()), - Container::Underline(x) => ("Underline", x.text_range()), - Container::Verbatim(x) => ("Verbatim", x.text_range()), - Container::Code(x) => ("Code", x.text_range()), - Container::Superscript(x) => ("Superscript", x.text_range()), - Container::Subscript(x) => ("Subscript", x.text_range()), - Container::BabelCall(x) => ("BabelCall", x.text_range()), - Container::PropertyDrawer(x) => ("PropertyDrawer", x.text_range()), - Container::AffiliatedKeyword(x) => ("AffiliatedKeyword", x.text_range()), - Container::Keyword(x) => ("Keyword", x.text_range()), - _ => unreachable!(), - }, - Event::Leave(_) => { - ident -= 2; - return; - } - Event::Text(x) => ("Text", x.text_range()), - Event::Macros(x) => ("Macros", x.text_range()), - Event::Cookie(x) => ("Cookie", x.text_range()), - Event::InlineCall(x) => ("InlineCall", x.text_range()), - Event::InlineSrc(x) => ("InlineSrc", x.text_range()), - Event::Clock(x) => ("Clock", x.text_range()), - Event::LineBreak(x) => ("LineBreak", x.text_range()), - Event::Snippet(x) => ("Snippet", x.text_range()), - Event::Rule(x) => ("Rule", x.text_range()), - Event::Timestamp(x) => ("Timestamp", x.text_range()), - Event::LatexFragment(x) => ("LatexFragment", x.text_range()), - Event::LatexEnvironment(x) => ("LatexEnvironment", x.text_range()), - Event::Entity(x) => ("Entity", x.text_range()), - _ => unreachable!(), - }; - - let _ = writeln!( - &mut result, - "{:ident$}{}@{}..{}", - "", - name, - u32::from(range.start()), - u32::from(range.end()) - ); - - if let Event::Enter(_) = event { - ident += 2; - } - }); - self.inner.traverse(&mut handler); - result - } - - #[wasm_bindgen(getter, js_name = "buildTime")] - pub fn build_time() -> String { - env!("CARGO_BUILD_TIME").into() - } - - #[wasm_bindgen(getter, js_name = "gitHash")] - pub fn git_hash() -> String { - env!("CARGO_GIT_HASH").into() - } -}