diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..70f9eae --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[registries.crates-io] +protocol = "sparse" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 33c1965..94540b8 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,16 +1,8 @@ version: 2 updates: -- package-ecosystem: cargo - directory: "/" - schedule: - interval: weekly - time: "09:00" - open-pull-requests-limit: 10 - ignore: - - dependency-name: pretty_assertions - versions: - - 0.7.1 - - dependency-name: nom - versions: - - 6.1.0 - - 6.1.1 + - package-ecosystem: cargo + directory: "/" + schedule: + interval: weekly + time: "09:00" + open-pull-requests-limit: 10 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5746014 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,57 @@ +name: CI + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@master + + - name: Setup rust + uses: dtolnay/rust-toolchain@stable + with: + components: clippy, rustfmt + + - run: | + cargo fmt -- --check + cargo test --all-features + cargo clippy + + gh-pages: + if: github.ref == 'refs/heads/v0.10' + + permissions: + contents: read + pages: write + id-token: write + + runs-on: ubuntu-latest + + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Pages + uses: actions/configure-pages@v3 + + - name: Install + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - name: Build + run: wasm-pack build -t web -d ./dist --out-name orgize ./wasm/ + + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + path: "./wasm" + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index 7679d25..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Rust - -on: - pull_request: - push: - branches: - - master - -jobs: - format: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v1 - - - name: Run rustfmt - run: cargo fmt -- --check - - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v1 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - override: true - - - name: Cache target/ - uses: actions/cache@v1 - with: - path: target - key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} - - - name: Run Test - run: cargo test --all-features diff --git a/.gitignore b/.gitignore index 4ca2515..081fe7a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,6 @@ **/*.rs.bk Cargo.lock -benches/*.org +.vscode .gdb_history perf.data* diff --git a/Cargo.toml b/Cargo.toml index a50f744..3fdaa8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,39 +1,53 @@ +[workspace] +resolver = "2" +members = [".", "./wasm"] + [package] name = "orgize" -version = "0.8.4" +version = "0.10.0-alpha.10" authors = ["PoiScript "] -description = "A Rust library for parsing orgmode files." repository = "https://github.com/PoiScript/orgize" -readme = "README.md" -edition = "2018" +edition = "2021" license = "MIT" -keywords = ["orgmode", "emacs", "parser"] +description = "A Rust library for parsing org-mode files." +readme = "README.md" +keywords = ["orgmode", "org-mode", "emacs", "parser"] [package.metadata.docs.rs] all-features = true -[badges] -travis-ci = { repository = "PoiScript/orgize" } - [features] -default = ["ser"] -ser = ["serde", "serde_indextree", "indexmap/serde-1"] +default = [] +indexmap = ["dep:indexmap"] +chrono = ["dep:chrono"] +tracing = ["dep:tracing"] +syntax-org-fc = [] [dependencies] -bytecount = "0.6.0" -chrono = { version = "0.4.11", optional = true } -indextree = "4.0.0" -jetscii = "0.4.4" -lazy_static = "1.4.0" -memchr = "2.3.3" -# we don't need to parse any float number, so lexical crate is redundant -nom = { version = "5.1.1", default-features = false, features = ["std"] } -serde = { version = "1.0.106", optional = true, features = ["derive"] } -serde_indextree = { version = "0.2.0", optional = true } -syntect = { version = "4.1.0", optional = true } -indexmap = { version = "1.3.2", features = ["serde-1"], optional = true} +bytecount = "0.6" +cfg-if = "1.0.0" +chrono = { version = "0.4", optional = true } +indexmap = { version = "2.1", optional = true } +jetscii = "0.5" +memchr = "2.5" +nom = { version = "7.1", default-features = false, features = ["std"] } +rowan = "0.15" +tracing = { version = "0.1", optional = true } [dev-dependencies] -pretty_assertions = "0.6.1" -serde_json = "1.0.51" -slugify = "0.1.0" +criterion = "0.5" +insta = "1.29" +slugify = "0.1" +tracing-subscriber = { version = "0.3", features = ["fmt"] } + +[[bench]] +name = "parse" +harness = false + +[[example]] +name = "parse" +required-features = ["tracing"] + +[profile.dev.package] +insta.opt-level = 3 +similar.opt-level = 3 diff --git a/LICENSE b/LICENSE index 0d477d3..f7cb351 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2020 Alex Lin (poi) +Copyright (c) 2019-2023 Alex Lin (poi) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 8074a44..bf7a1d9 100644 --- a/README.md +++ b/README.md @@ -1,210 +1,119 @@ # Orgize -[![Build Status](https://travis-ci.org/PoiScript/orgize.svg?branch=master)](https://travis-ci.org/PoiScript/orgize) [![Crates.io](https://img.shields.io/crates/v/orgize.svg)](https://crates.io/crates/orgize) -[![Document](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize) +[![Documentation](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize) +[![Build status](https://img.shields.io/github/actions/workflow/status/PoiScript/orgize/ci.yml)](https://github.com/PoiScript/orgize/actions/workflows/ci.yml) +![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg) -A Rust library for parsing orgmode files. +A Rust library for parsing org-mode files. -[Live demo](https://orgize.herokuapp.com/) +Live Demo: ## Parse -To parse a orgmode string, simply invoking the `Org::parse` function: +To parse a org-mode string, simply invoking the `Org::parse` function: + +```rust +use orgize::{Org, rowan::ast::AstNode}; + +let org = Org::parse("* DONE Title :tag:"); +assert_eq!( + format!("{:#?}", org.document().syntax()), + r#"DOCUMENT@0..18 + HEADLINE@0..18 + HEADLINE_STARS@0..1 "*" + WHITESPACE@1..2 " " + HEADLINE_KEYWORD_DONE@2..6 "DONE" + WHITESPACE@6..7 " " + HEADLINE_TITLE@7..13 + TEXT@7..13 "Title " + HEADLINE_TAGS@13..18 + COLON@13..14 ":" + TEXT@14..17 "tag" + COLON@17..18 ":" +"#); +``` + +use `ParseConfig::parse` to specific a custom parse config + +```rust +use orgize::{Org, ParseConfig, ast::Headline}; + +let config = ParseConfig { + // custom todo keywords + todo_keywords: (vec!["TASK".to_string()], vec![]), + ..Default::default() +}; +let org = config.parse("* TASK Title 1"); +let hdl = org.first_node::().unwrap(); +assert_eq!(hdl.todo_keyword().unwrap(), "TASK"); +``` + +## Traverse + +Use `org.traverse(&mut traversal)` to walk through the syntax tree. + +```rust +use orgize::{ + export::{from_fn, Container, Event}, + Org, +}; + +let mut hdl_count = 0; +let mut handler = from_fn(|event| { + if matches!(event, Event::Enter(Container::Headline(_))) { + hdl_count += 1; + } +}); +Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler); +assert_eq!(hdl_count, 3); +``` + +## Modify + +Use `org.replace_range(TextRange::new(start, end), "new_text")` to modify the syntax tree: + +```rust +use orgize::{Org, ParseConfig, ast::Headline, TextRange}; + +let mut org = Org::parse("hello\n* world"); + +let hdl = org.first_node::().unwrap(); +org.replace_range(hdl.text_range(), "** WORLD!"); + +let hdl = org.first_node::().unwrap(); +assert_eq!(hdl.level(), 2); + +org.replace_range(TextRange::up_to(hdl.start()), ""); +assert_eq!(org.to_org(), "** WORLD!"); +``` + +## Render to html + +Call the `Org::to_html` function to export org element tree to html: ```rust use orgize::Org; -Org::parse("* DONE Title :tag:"); -``` - -or `Org::parse_custom`: - -``` rust -use orgize::{Org, ParseConfig}; - -Org::parse_custom( - "* TASK Title 1", - &ParseConfig { - // custom todo keywords - todo_keywords: (vec!["TASK".to_string()], vec![]), - ..Default::default() - }, -); -``` - -## Iter - -`Org::iter` function will returns an iterator of `Event`s, which is -a simple wrapper of `Element`. - -```rust -use orgize::Org; - -for event in Org::parse("* DONE Title :tag:").iter() { - // handling the event -} -``` - -**Note**: whether an element is container or not, it will appears twice in one loop. -One as `Event::Start(element)`, one as `Event::End(element)`. - -## Render html - -You can call the `Org::write_html` function to generate html directly, which -uses the `DefaultHtmlHandler` internally: - -```rust -use orgize::Org; - -let mut writer = Vec::new(); -Org::parse("* title\n*section*").write_html(&mut writer).unwrap(); - assert_eq!( - String::from_utf8(writer).unwrap(), + Org::parse("* title\n*section*").to_html(), "

title

section

" ); ``` -## Render html with custom `HtmlHandler` - -To customize html rendering, simply implementing `HtmlHandler` trait and passing -it to the `Org::wirte_html_custom` function. - -The following code demonstrates how to add a id for every headline and return -own error type while rendering. - -```rust -use std::convert::From; -use std::io::{Error as IOError, Write}; -use std::string::FromUtf8Error; - -use orgize::export::{DefaultHtmlHandler, HtmlHandler}; -use orgize::{Element, Org}; -use slugify::slugify; - -#[derive(Debug)] -enum MyError { - IO(IOError), - Heading, - Utf8(FromUtf8Error), -} - -// From trait is required for custom error type -impl From for MyError { - fn from(err: IOError) -> Self { - MyError::IO(err) - } -} - -impl From for MyError { - fn from(err: FromUtf8Error) -> Self { - MyError::Utf8(err) - } -} - -#[derive(Default)] -struct MyHtmlHandler(DefaultHtmlHandler); - -impl HtmlHandler for MyHtmlHandler { - fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { - if let Element::Title(title) = element { - if title.level > 6 { - return Err(MyError::Heading); - } else { - write!( - w, - "", - title.level, - slugify!(&title.raw), - )?; - } - } else { - // fallthrough to default handler - self.0.start(w, element)?; - } - Ok(()) - } - - fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { - if let Element::Title(title) = element { - write!(w, "", title.level)?; - } else { - self.0.end(w, element)?; - } - Ok(()) - } -} - -fn main() -> Result<(), MyError> { - let mut writer = Vec::new(); - let mut handler = MyHtmlHandler::default(); - Org::parse("* title\n*section*").wirte_html_custom(&mut writer, &mut handler)?; - - assert_eq!( - String::from_utf8(writer)?, - "

title

\ -

section

" - ); - - Ok(()) -} -``` - -**Note**: as I mentioned above, each element will appears two times while iterating. -And handler will silently ignores all end events from non-container elements. - -So if you want to change how a non-container element renders, just redefine the `start` -function and leave the `end` function unchanged. - -## Serde - -`Org` struct have already implemented serde's `Serialize` trait. It means you can -serialize it into any format supported by serde, such as json: - -```rust -use orgize::Org; -use serde_json::{json, to_string}; - -let org = Org::parse("I 'm *bold*."); -println!("{}", to_string(&org).unwrap()); - -// { -// "type": "document", -// "children": [{ -// "type": "section", -// "children": [{ -// "type": "paragraph", -// "children":[{ -// "type": "text", -// "value":"I 'm " -// }, { -// "type": "bold", -// "children":[{ -// "type": "text", -// "value": "bold" -// }] -// }, { -// "type":"text", -// "value":"." -// }] -// }] -// }] -// } -``` +Checkout `examples/html-slugify.rs` on how to customizing html export process. ## Features -By now, orgize provides four features: +- **`chrono`**: adds the ability to convert `Timestamp` into `chrono::NaiveDateTime`, disabled by default. -+ `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. +- **`indexmap`**: adds the ability to convert `PropertyDrawer` properties into `IndexMap`, disabled by default. -+ `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default. +## API compatibility -+ `syntect`: provides `SyntectHtmlHandler` for highlighting code block, disabled by default. +`element.syntax()` exposes access to the internal syntax tree, along with some rowan low-level APIs. +This can be useful for intricate tasks. -+ `indexmap`: Uses `IndexMap` instead of `HashMap` for properties to preserve their order, disabled by default. - -## License - -MIT +However, the structure of the internal syntax tree can change between different versions of the library. +Because of this, the result of `element.syntax()` doesn't follow semantic versioning, +which means updates might break your code if it relies on this method. diff --git a/benches/.gitignore b/benches/.gitignore new file mode 100644 index 0000000..448d1fb --- /dev/null +++ b/benches/.gitignore @@ -0,0 +1 @@ +*.org \ No newline at end of file diff --git a/benches/parse.rs b/benches/parse.rs index 0666be2..4549d95 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,30 +1,39 @@ -#![feature(test)] - -extern crate test; +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use orgize::Org; -use test::Bencher; -#[bench] -fn org_syntax(b: &mut Bencher) { - // wget https://orgmode.org/worg/sources/dev/org-syntax.org - b.iter(|| { - Org::parse(include_str!("org-syntax.org")); - }) +const INPUT: &[(&str, &str)] = &[ + ("doc.org", include_str!("./doc.org")), + ("org-faq.org", include_str!("./org-faq.org")), + ("org-hacks.org", include_str!("./org-hacks.org")), + ( + "org-release-notes.org", + include_str!("./org-release-notes.org"), + ), + ("org-syntax.org", include_str!("./org-syntax.org")), +]; + +pub fn bench_parse(c: &mut Criterion) { + let mut group = c.benchmark_group("Org::parse"); + + for (id, org) in INPUT { + group.throughput(Throughput::Bytes(org.len() as u64)); + group.bench_with_input(*id, org, |b, i| b.iter(|| Org::parse(i))); + } + + group.finish(); } -#[bench] -fn doc(b: &mut Bencher) { - // wget https://orgmode.org/worg/sources/doc.org - b.iter(|| { - Org::parse(include_str!("doc.org")); - }) +pub fn bench_to_html(c: &mut Criterion) { + let mut group = c.benchmark_group("Org::to_html"); + + for (id, org) in INPUT { + group.throughput(Throughput::Bytes(org.len() as u64)); + group.bench_with_input(*id, &Org::parse(org), |b, i| b.iter(|| i.to_html())); + } + + group.finish(); } -#[bench] -fn org_faq(b: &mut Bencher) { - // wget https://orgmode.org/worg/sources/org-faq.org - b.iter(|| { - Org::parse(include_str!("org-faq.org")); - }) -} +criterion_group!(benches, bench_parse, bench_to_html); +criterion_main!(benches); diff --git a/development.md b/development.md new file mode 100644 index 0000000..bcd343c --- /dev/null +++ b/development.md @@ -0,0 +1,44 @@ +## Format, test, lint + +```shell +cargo fmt -- --check +cargo test --all-features +cargo clippy --allow-dirty --allow-staged +``` + +## Update snapshot testing + +```shell +cargo install cargo-insta +cargo insta test --all-features +cargo insta review +``` + +## Fuzz testing + +```shell +cargo install cargo-fuzz +rustup default nightly +cargo fuzz run fuzz_target_1 +``` + +## Benchmark + +```shell +curl -q https://orgmode.org/worg/doc.org --output ./benches/doc.org +curl -q https://orgmode.org/worg/org-faq.org --output ./benches/org-faq.org +curl -q https://orgmode.org/worg/org-hacks.org --output ./benches/org-hacks.org +curl -q https://orgmode.org/worg/org-release-notes.org --output ./benches/org-release-notes.org +curl -q https://orgmode.org/worg/org-syntax.org --output ./benches/org-syntax.org +curl -q https://raw.githubusercontent.com/bzg/org-mode/main/doc/org-manual.org --output ./benches/org-manual.org + +cargo bench --bench parse +``` + +## Benchmark w/ flamegraph + +```shell +cargo install flamegraph +cargo flamegraph --bench parse -o baseline.svg -- --bench +# then open baseline.svg with your browser +``` diff --git a/docs/STATUS.md b/docs/STATUS.md index 6788561..d0318b8 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -4,7 +4,7 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Headline - [X] Objects insides headline title -- [ ] Affiliated Keywords +- [x] Affiliated Keywords ## Greater Elements - [x] Greater Blocks @@ -15,10 +15,10 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [ ] Objects insides inlinetask title - [x] Plain Lists and Items - [x] Nested List - - [ ] Nested List Indentation - - [ ] Tag - - [ ] Counter - - [ ] Counter set + - [x] Nested List Indentation + - [x] Tag + - [x] Counter + - [x] Counter set - [X] Property Drawers - [X] Tables @@ -26,25 +26,25 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Babel Call - [x] Blocks - - [ ] Escape characters (`#`,`*`, etc) + - [x] Escape characters (`#`,`*`, etc) - [ ] Line numbers - [X] Clock, Diary Sexp and Planning - [x] Comments - [x] Fixed Width Areas - [x] Horizontal Rules - [x] Keywords -- [ ] LaTeX Environments +- [x] LaTeX Environments - [X] Node Properties - [x] Paragraphs - [X] Table Rows ## Objects -- [ ] Entities and LaTeX Fragments +- [x] Entities and LaTeX Fragments - [x] Export Snippets - [x] Footnote References - [x] Inline Babel Calls and Source Blocks -- [ ] Line Breaks +- [x] Line Breaks - [x] Links - [x] Regular link - [ ] Plain link @@ -53,17 +53,22 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information. - [x] Macros - [x] Targets and Radio Targets - [x] Statistics Cookies -- [ ] Subscript and Superscript +- [x] Subscript and Superscript - [X] Table Cells - [x] Timestamps - [x] Text Markup + - [x] bold + - [x] italic + - [x] underline + - [x] verbatim + - [x] code + - [x] strike-through ## Export -- [x] HTML -- [X] Org +- [x] HTML `Org::to_html` +- [X] Org `Org::to_org` - [ ] LaTeX -- [X] JSON, (via Serde) ## Extra diff --git a/docs/SYNTAX.md b/docs/SYNTAX.md deleted file mode 100644 index a2ac5b2..0000000 --- a/docs/SYNTAX.md +++ /dev/null @@ -1,942 +0,0 @@ -# Table of Contents - -1. [Headlines and Sections](#Headlines_and_Sections) -2. [Affiliated Keywords](#Affiliated_keywords) -3. [Greater Elements](#Greater_Elements) - 1. [Greater Blocks](#Greater_Blocks) - 2. [Drawers and Property Drawers](#Drawers) - 3. [Dynamic Blocks](#Dynamic_Blocks) - 4. [Footnote Definitions](#Footnote_Definitions) - 5. [Inlinetasks](#Inlinetasks) - 6. [Plain Lists and Items](#Plain_Lists_and_Items) - 7. [Property Drawers](#Property_Drawers) - 8. [Tables](#Tables) -4. [Elements](#Elements) - 1. [Babel Call](#Babel_Call) - 2. [Blocks](#Blocks) - 3. [Clock, Diary Sexp and Planning](#Clock,_Diary_Sexp_and_Planning) - 4. [Comments](#Comments) - 5. [Fixed Width Areas](#Fixed_Width_Areas) - 6. [Horizontal Rules](#Horizontal_Rules) - 7. [Keywords](#Keywords) - 8. [LaTeX Environments](#LaTeX_Environments) - 9. [Node Properties](#Node_Properties) - 10. [Paragraphs](#Paragraphs) - 11. [Table Rows](#Table_Rows) -5. [Objects](#Objects) - 1. [Entities and LaTeX Fragments](#Entities_and_LaTeX_Fragments) - 2. [Export Snippets](#Export_Snippets) - 3. [Footnote References](#Footnote_References) - 4. [Inline Babel Calls and Source - Blocks](#Inline_Babel_Calls_and_Source_Blocks) - 5. [Line Breaks](#Line_Breaks) - 6. [Links](#Links) - 7. [Macros](#Macros) - 8. [Targets and Radio Targets](#Targets_and_Radio_Targets) - 9. [Statistics Cookies](#Statistics_Cookies) - 10. [Subscript and Superscript](#Subscript_and_Superscript) - 11. [Table Cells](#Table_Cells) - 12. [Timestamps](#Timestamp) - 13. [Text Markup](#Emphasis_Markers) - -This document describes and comments Org syntax as it is currently read by its -parser (Org Elements) and, therefore, by the export framework. It also includes -a few comments on that syntax. - -A core concept in this syntax is that only headlines, sections, planning lines -and property drawers are context-free1, 2. Every other syntactical part only exists within -specific environments. - -Three categories are used to classify these environments: **Greater elements**, -**elements**, and **objects**, from the broadest scope to the narrowest. The -word **element** is used for both Greater and non-Greater elements, the context -should make that clear. - -The paragraph is the unit of measurement. An element defines syntactical parts -that are at the same level as a paragraph, i.e. which cannot contain or be -included in a paragraph. An object is a part that could be included in an -element. Greater elements are all parts that can contain an element. - -Empty lines belong to the largest element ending before them. For example, in a -list, empty lines between items belong are part of the item before them, but -empty lines at the end of a list belong to the plain list element. - -Unless specified otherwise, case is not significant. - - - -# Headlines and Sections - -A headline is defined as: - - STARS KEYWORD PRIORITY TITLE TAGS - -STARS is a string starting at column 0, containing at least one asterisk (and up -to `org-inlinetask-min-level` if `org-inlinetask` library is loaded) and ended -by a space character. The number of asterisks is used to define the level of the -headline. It's the sole compulsory part of a headline. - -KEYWORD is a TODO keyword, which has to belong to the list defined in -`org-todo-keywords-1`. Case is significant. - -PRIORITY is a priority cookie, i.e. a single letter preceded by a hash sign # -and enclosed within square brackets. - -TITLE can be made of any character but a new line. Though, it will match after -every other part have been matched. - -TAGS is made of words containing any alpha-numeric character, underscore, at -sign, hash sign or percent sign, and separated with colons. - -Examples of valid headlines include: - - * - - ** DONE - - *** Some e-mail - - **** TODO [#A] COMMENT Title :tag:a2%: - -If the first word appearing in the title is `COMMENT`, the headline will be -considered as **commented**. Case is significant. - -If its title is `org-footnote-section`, it will be considered as a **footnote -section**. Case is significant. - -If `ARCHIVE` is one of its tags, it will be considered as **archived**. Case is -significant. - -A headline contains directly one section (optionally), followed by any number of -deeper level headlines. - -A section contains directly any greater element or element. Only a headline can -contain a section. As an exception, text before the first headline in the -document also belongs to a section. - -As an example, consider the following document: - - An introduction. - - * A Headline - - Some text. - - ** Sub-Topic 1 - - ** Sub-Topic 2 - - *** Additional entry - -Its internal structure could be summarized as: - - (document - (section) - (headline - (section) - (headline) - (headline - (headline)))) - - - -# Affiliated Keywords - -With the exception of [inlinetasks](#Inlinetasks), -[items](#Plain_Lists_and_Items), [planning](#Clock,_Diary_Sexp_and_Planning), -[clocks](#Clock,_Diary_Sexp_and_Planning), [node properties](#Node_Properties) -and [table rows](#Table_Rows), every other element type can be assigned -attributes. - -This is done by adding specific keywords, named **affiliated keywords**, just -above the element considered, no blank line allowed. - -Affiliated keywords are built upon one of the following patterns: `#+KEY: VALUE`, -`#+KEY[OPTIONAL]: VALUE` or `#+ATTR_BACKEND: VALUE`. - -KEY is either `CAPTION`, `HEADER`, `NAME`, `PLOT` or `RESULTS` string. - -BACKEND is a string constituted of alpha-numeric characters, hyphens or -underscores. - -OPTIONAL and VALUE can contain any character but a new line. Only `CAPTION` and -`RESULTS` keywords can have an optional value. - -An affiliated keyword can appear more than once if KEY is either `CAPTION` or -`HEADER` or if its pattern is `#+ATTR_BACKEND: VALUE`. - -`CAPTION`, `AUTHOR`, `DATE` and `TITLE` keywords can contain objects in their -value and their optional value, if applicable. - - - -# Greater Elements - -Unless specified otherwise, greater elements can contain directly any other -element or greater element excepted: - -- elements of their own type, -- [node properties](#Node_Properties), which can only be found in [property - drawers](#Property_Drawers), -- [items](#Plain_Lists_and_Items), which can only be found in [plain - lists](#Plain_Lists_and_Items). - - - -## Greater Blocks - -Greater blocks consist in the following pattern: - - #+BEGIN_NAME PARAMETERS - CONTENTS - #+END_NAME - -NAME can contain any non-whitespace character. - -PARAMETERS can contain any character other than new line, and can be omitted. - -If NAME is `CENTER`, it will be a **center block**. If it is `QUOTE`, it will be -a **quote block**. - -If the block is neither a center block, a quote block or a [block -element](#Blocks), it will be a **special block**. - -CONTENTS can contain any element, except : a line `#+END_NAME` on its own. Also -lines beginning with STARS must be quoted by a comma. - - - -## Drawers and Property Drawers - -Pattern for drawers is: - - :NAME: - CONTENTS - :END: - -NAME can contain word-constituent characters, hyphens and underscores. - -CONTENTS can contain any element but another drawer. - - - -## Dynamic Blocks - -Pattern for dynamic blocks is: - - #+BEGIN: NAME PARAMETERS - CONTENTS - #+END: - -NAME cannot contain any whitespace character. - -PARAMETERS can contain any character and can be omitted. - - - -## Footnote Definitions - -Pattern for footnote definitions is: - - [fn:LABEL] CONTENTS - -It must start at column 0. - -LABEL is either a number or follows the pattern `fn:WORD`, where word can -contain any word-constituent character, hyphens and underscore characters. - -CONTENTS can contain any element excepted another footnote definition. It ends -at the next footnote definition, the next headline, two consecutive empty lines -or the end of buffer. - - - -## Inlinetasks - -Inlinetasks are defined by `org-inlinetask-min-level` contiguous asterisk -characters starting at column 0, followed by a whitespace character. - -Optionally, inlinetasks can be ended with a string constituted of -`org-inlinetask-min-level` contiguous asterisk characters starting at column 0, -followed by a space and the `END` string. - -Inlinetasks are recognized only after `org-inlinetask` library is loaded. - - - -## Plain Lists and Items - -Items are defined by a line starting with the following pattern: -`BULLET COUNTER-SET CHECK-BOX TAG`, in which only BULLET is mandatory. - -BULLET is either an asterisk, a hyphen, a plus sign character or follows either -the pattern `COUNTER.` or `COUNTER)`. In any case, BULLET is follwed by a -whitespace character or line ending. - -COUNTER can be a number or a single letter. - -COUNTER-SET follows the pattern [@COUNTER]. - -CHECK-BOX is either a single whitespace character, a `X` character or a hyphen, -enclosed within square brackets. - -TAG follows `TAG-TEXT ::` pattern, where TAG-TEXT can contain any character but -a new line. - -An item ends before the next item, the first line less or equally indented than -its starting line, or two consecutive empty lines. Indentation of lines within -other greater elements do not count, neither do inlinetasks boundaries. - -A plain list is a set of consecutive items of the same indentation. It can only -directly contain items. - -If first item in a plain list has a counter in its bullet, the plain list will -be an **ordered plain-list**. If it contains a tag, it will be a **descriptive -list**. Otherwise, it will be an **unordered list**. List types are mutually -exclusive. - -For example, consider the following excerpt of an Org document: - - 1. item 1 - 2. [X] item 2 - - some tag :: item 2.1 - -Its internal structure is as follows: - - (ordered-plain-list - (item) - (item - (descriptive-plain-list - (item)))) - - - -## Property Drawers - -Property drawers are a special type of drawer containing properties attached to -a headline. They are located right after a [headline](#Headlines_and_Sections) -and its [planning](#Clock,_Diary_Sexp_and_Planning) information. - - HEADLINE - PROPERTYDRAWER - - HEADLINE - PLANNING - PROPERTYDRAWER - -PROPERTYDRAWER follows the pattern - - :PROPERTIES: - CONTENTS - :END: - -where CONTENTS consists of zero or more [node properties](#Node_Properties). - - - -## Tables - -Tables start at lines beginning with either a vertical bar or the `+-` string -followed by plus or minus signs only, assuming they are not preceded with lines -of the same type. These lines can be indented. - -A table starting with a vertical bar has `org` type. Otherwise it has `table.el` -type. - -Org tables end at the first line not starting with a vertical bar. Table.el -tables end at the first line not starting with either a vertical line or a plus -sign. Such lines can be indented. - -An org table can only contain table rows. A table.el table does not contain -anything. - -One or more `#+TBLFM: FORMULAS` lines, where `FORMULAS` can contain any -character, can follow an org table. - - - -# Elements - -Elements cannot contain any other element. - -Only [keywords](#Keywords) whose name belongs to -`org-element-document-properties`, [verse blocks](#Blocks) , -[paragraphs](#Paragraphs) and [table rows](#Table_Rows) can contain objects. - - - -## Babel Call - -Pattern for babel calls is: - - #+CALL: VALUE - -VALUE is optional. It can contain any character but a new line. - - - -## Blocks - -Like [greater blocks](#Greater_Blocks), pattern for blocks is: - - #+BEGIN_NAME DATA - CONTENTS - #+END_NAME - -NAME cannot contain any whitespace character. - -1. If NAME is `COMMENT`, it will be a **comment block**. -2. If it is `EXAMPLE`, it will be an **example block**. -3. If it is `EXPORT`, it will be an **export block**. -4. If it is `SRC`, it will be a **source block**. -5. If it is `VERSE`, it will be a **verse block**. - -DATA can contain any character but a new line. It can be ommitted, unless the -block is either a **source block** or an **export block**. - -In the latter case, it should be constituted of a single word. - -In the former case, it must follow the pattern `LANGUAGE SWITCHES ARGUMENTS`, -where SWITCHES and ARGUMENTS are optional. - -LANGUAGE cannot contain any whitespace character. - -SWITCHES is made of any number of `SWITCH` patterns, separated by blank lines. - -A SWITCH pattern is either `-l FORMAT`, where FORMAT can contain any character -but a double quote and a new line, `-S` or `+S`, where S stands for a single -letter. - -ARGUMENTS can contain any character but a new line. - -CONTENTS can contain any character, including new lines. Though it will only -contain Org objects if the block is a verse block. Otherwise, CONTENTS will not -be parsed. - - - -## Clock, Diary Sexp and Planning - -A clock follows either of the patterns below: - - CLOCK: INACTIVE-TIMESTAMP - CLOCK: INACTIVE-TIMESTAMP-RANGE DURATION - -INACTIVE-TIMESTAMP, resp. INACTIVE-TIMESTAMP-RANGE, is an inactive, resp. -inactive range, [timestamp](#Timestamp) object. - -DURATION follows the pattern: - - => HH:MM - -HH is a number containing any number of digits. MM is a two digit numbers. - -A diary sexp is a line starting at column 0 with `%%(` string. It can then -contain any character besides a new line. - -A planning is an element with the following pattern: - - HEADLINE - PLANNING - -where HEADLINE is a [headline](#Headlines_and_Sections) element and PLANNING is -a line filled with INFO parts, where each of them follows the pattern: - - KEYWORD: TIMESTAMP - -KEYWORD is either `DEADLINE`, `SCHEDULED` or `CLOSED`. TIMESTAMP is a -[timestamp](#Timestamp) object. - -In particular, no blank line is allowed between PLANNING and HEADLINE. - - - -## Comments - -A **comment line** starts with a hash signe and a whitespace character or an end -of line. - -Comments can contain any number of consecutive comment lines. - - - -## Fixed Width Areas - -A **fixed-width line** start with a colon character and a whitespace or an end -of line. - -Fixed width areas can contain any number of consecutive fixed-width lines. - - - -## Horizontal Rules - -A horizontal rule is a line made of at least 5 consecutive hyphens. It can be -indented. - - - -## Keywords - -Keywords follow the syntax: - - #+KEY: VALUE - -KEY can contain any non-whitespace character, but it cannot be equal to `CALL` -or any affiliated keyword. - -VALUE can contain any character excepted a new line. - -If KEY belongs to `org-element-document-properties`, VALUE can contain objects. - - - -## LaTeX Environments - -Pattern for LaTeX environments is: - - \begin{NAME} CONTENTS \end{NAME} - -NAME is constituted of alpha-numeric or asterisk characters. - -CONTENTS can contain anything but the `\end{NAME}` string. - - - -## Node Properties - -Node properties can only exist in [property drawers](#Property_Drawers). Their -pattern is any of the following - - :NAME: VALUE - - :NAME+: VALUE - - :NAME: - - :NAME+: - -NAME can contain any non-whitespace character but cannot end with a plus sign. -It cannot be the empty string. - -VALUE can contain anything but a newline character. - - - -## Paragraphs - -Paragraphs are the default element, which means that any unrecognized context is -a paragraph. - -Empty lines and other elements end paragraphs. - -Paragraphs can contain every type of object. - - - -## Table Rows - -A table rows is either constituted of a vertical bar and any number of [table -cells](#Table_Cells) or a vertical bar followed by a hyphen. - -In the first case the table row has the **standard** type. In the second case, -it has the **rule** type. - -Table rows can only exist in [tables](#Tables). - - - -# Objects - -Objects can only be found in the following locations: - -- [affiliated keywords](#Affiliated_keywords) defined in - `org-element-parsed-keywords`, -- [document properties](#Keywords), -- [headline](#Headlines_and_Sections) titles, -- [inlinetask](#Inlinetasks) titles, -- [item](#Plain_Lists_and_Items) tags, -- [paragraphs](#Paragraphs), -- [table cells](#Table_Cells), -- [table rows](#Table_Rows), which can only contain table cell objects, -- [verse blocks](#Blocks). - -Most objects cannot contain objects. Those which can will be specified. - - - -## Entities and LaTeX Fragments - -An entity follows the pattern: - - \NAME POST - -where NAME has a valid association in either `org-entities` or -`org-entities-user`. - -POST is the end of line, `{}` string, or a non-alphabetical character. It isn't -separated from NAME by a whitespace character. - -A LaTeX fragment can follow multiple patterns: - - \NAME BRACKETS - \(CONTENTS\) - \[CONTENTS\] - $$CONTENTS$$ - PRE$CHAR$POST - PRE$BORDER1 BODY BORDER2$POST - -NAME contains alphabetical characters only and must not have an association in -either **org-entities** or **org-entities-user**. - -BRACKETS is optional, and is not separated from NAME with white spaces. It may -contain any number of the following patterns: - - [CONTENTS1] - {CONTENTS2} - -where CONTENTS1 can contain any characters excepted `{` `}`, `[` `]` and newline -and CONTENTS2 can contain any character excepted `{`, `}` and newline. - -CONTENTS can contain any character but cannot contain `\\)` in the second -template or `\\]` in the third one. - -PRE is either the beginning of line or a character different from `$`. - -CHAR is a non-whitespace character different from `.`, `,`, `?`, `;`, `'` or a -double quote. - -POST is any punctuation (including parentheses and quotes) or space character, -or the end of line. - -BORDER1 is a non-whitespace character different from `.`, `,`, `;` and `$`. - -BODY can contain any character excepted `$`, and may not span over more than 3 -lines. - -BORDER2 is any non-whitespace character different from `,`, `.` and `$`. - ---- - -> It would introduce incompatibilities with previous Org versions, but support -> for `$...$` (and for symmetry, `$$...$$`) constructs ought to be removed. -> -> They are slow to parse, fragile, redundant and imply false positives. — -> ngz - - - -## Export Snippets - -Patter for export snippets is: - - @@NAME:VALUE@@ - -NAME can contain any alpha-numeric character and hyphens. - -VALUE can contain anything but `@@` string. - - - -## Footnote References - -There are four patterns for footnote references: - - [fn:LABEL] - [fn:LABEL:DEFINITION] - [fn::DEFINITION] - -LABEL can contain any word constituent character, hyphens and underscores. - -DEFINITION can contain any character. Though opening and closing square brackets -must be balanced in it. It can contain any object encountered in a paragraph, -even other footnote references. - -If the reference follows the second pattern, it is called an **inline -footnote**. If it follows the third one, i.e. if LABEL is omitted, it is an -**anonymous footnote**. - - - -## Inline Babel Calls and Source Blocks - -Inline Babel calls follow any of the following patterns: - - call_NAME(ARGUMENTS) - call_NAME[HEADER](ARGUMENTS)[HEADER] - -NAME can contain any character besides `(`, `)` and `\n`. - -HEADER can contain any character besides `]` and `\n`. - -ARGUMENTS can contain any character besides `)` and `\n`. - -Inline source blocks follow any of the following patterns: - - src_LANG{BODY} - src_LANG[OPTIONS]{BODY} - -LANG can contain any non-whitespace character. - -OPTIONS and BODY can contain any character but `\n`. - - - -## Line Breaks - -A line break consists in `\\\SPACE` pattern at the end of an otherwise non-empty -line. - -SPACE can contain any number of tabs and spaces, including 0. - - - -## Links - -There are 4 major types of links: - - PRE1 RADIO POST1 ("radio" link) - ("angle" link) - PRE2 PROTOCOL:PATH2 POST2 ("plain" link) - [[PATH3]DESCRIPTION] ("regular" link) - -PRE1 and POST1, when they exist, are non alphanumeric characters. - -RADIO is a string matched by some [radio target](#Targets_and_Radio_Targets). It -may contain [entities](#Entities_and_LaTeX_Fragments), [latex -fragments](#Entities_and_LaTeX_Fragments), -[subscript](#Subscript_and_Superscript) and -[superscript](#Subscript_and_Superscript). - -PROTOCOL is a string among `org-link-types`. - -PATH can contain any character but `]`, `<`, `>` and `\n`. - -PRE2 and POST2, when they exist, are non word constituent characters. - -PATH2 can contain any non-whitespace character excepted `(`, `)`, `<` and `>`. -It must end with a word-constituent character, or any non-whitespace -non-punctuation character followed by `/`. - -DESCRIPTION must be enclosed within square brackets. It can contain any -character but square brackets. It can contain any object found in a paragraph -excepted a [footnote reference](#Footnote_References), a [radio -target](#Targets_and_Radio_Targets) and a [line break](#Line_Breaks). It cannot -contain another link either, unless it is a plain or angular link. - -DESCRIPTION is optional. - -PATH3 is built according to the following patterns: - - FILENAME ("file" type) - PROTOCOL:PATH4 ("PROTOCOL" type) - PROTOCOL://PATH4 ("PROTOCOL" type) - id:ID ("id" type) - #CUSTOM-ID ("custom-id" type) - (CODEREF) ("coderef" type) - FUZZY ("fuzzy" type) - -FILENAME is a file name, either absolute or relative. - -PATH4 can contain any character besides square brackets. - -ID is constituted of hexadecimal numbers separated with hyphens. - -PATH4, CUSTOM-ID, CODEREF and FUZZY can contain any character besides square -brackets. - - - -## Macros - -Macros follow the pattern: - - {{{NAME(ARGUMENTS)}}} - -NAME must start with a letter and can be followed by any number of alpha-numeric -characters, hyphens and underscores. - -ARGUMENTS can contain anything but `}}}` string. Values within ARGUMENTS are -separated by commas. Non-separating commas have to be escaped with a backslash -character. - - - -## Targets and Radio Targets - -Radio targets follow the pattern: - - <<>> - -CONTENTS can be any character besides `<`, `>` and `\n`. It cannot start or end -with a whitespace character. As far as objects go, it can contain [text -markup](#Emphasis_Markers), [entities](#Entities_and_LaTeX_Fragments), [latex -fragments](#Entities_and_LaTeX_Fragments), -[subscript](#Subscript_and_Superscript) and -[superscript](#Subscript_and_Superscript) only. - -Targets follow the pattern: - - <> - -TARGET can contain any character besides `<`, `>` and `\n`. It cannot start or -end with a whitespace character. It cannot contain any object. - - - -## Statistics Cookies - -Statistics cookies follow either pattern: - - [PERCENT%] - [NUM1/NUM2] - -PERCENT, NUM1 and NUM2 are numbers or the empty string. - - - -## Subscript and Superscript - -Pattern for subscript is: - - CHAR_SCRIPT - -Pattern for superscript is: - - CHAR^SCRIPT - -CHAR is any non-whitespace character. - -SCRIPT can be `*` or an expression enclosed in parenthesis (respectively curly -brackets), possibly containing balanced parenthesis (respectively curly -brackets). - -SCRIPT can also follow the pattern: - - SIGN CHARS FINAL - -SIGN is either a plus sign, a minus sign, or an empty string. - -CHARS is any number of alpha-numeric characters, commas, backslashes and dots, -or an empty string. - -FINAL is an alpha-numeric character. - -There is no white space between SIGN, CHARS and FINAL. - - - -## Table Cells - -Table cells follow the pattern: - - CONTENTS SPACES| - -CONTENTS can contain any character excepted a vertical bar. - -SPACES contains any number of space characters, including zero. It can be used -to align properly the table. - -The final bar may be replaced with a newline character for the last cell in row. - - - -## Timestamps - -There are seven possible patterns for timestamps: - - <%%(SEXP)> (diary) - (active) - [DATE TIME REPEATER-OR-DELAY] (inactive) - -- (active range) - (active range) - [DATE TIME REPEATER-OR-DELAY]--[DATE TIME REPEATER-OR-DELAY] (inactive range) - [DATE TIME-TIME REPEATER-OR-DELAY] (inactive range) - -SEXP can contain any character excepted `>` and `\n`. - -DATE follows the pattern: - - YYYY-MM-DD DAYNAME - -`Y`, `M` and `D` are digits. DAYNAME can contain any non whitespace-character -besides `+`, `-`, `]`, `>`, a digit or `\n`. - -TIME follows the pattern `H:MM`. `H` can be one or two digit long and can start -with 0. - -REPEATER-OR-DELAY follows the pattern: - - MARK VALUE UNIT - -MARK is `+` (cumulate type), `++` (catch-up type) or `.+` (restart type) for a -repeater, and `-` (all type) or `--` (first type) for warning delays. - -VALUE is a number. - -UNIT is a character among `h` (hour), `d` (day), `w` (week), `m` (month), `y` -(year). - -MARK, VALUE and UNIT are not separated by whitespace characters. - -There can be two REPEATER-OR-DELAY in the timestamp: one as a repeater and one -as a warning delay. - - - -## Text Markup - -Text markup follows the pattern: - - PRE MARKER CONTENTS MARKER POST - -PRE is a whitespace character, `(`, `{` `'` or a double quote. It can also be a -beginning of line. - -MARKER is a character among `*` (bold), `=` (verbatim), `/` (italic), `+` -(strike-through), `_` (underline), `~` (code). - -CONTENTS is a string following the pattern: - - BORDER BODY BORDER - -BORDER can be any non-whitespace character excepted `,`, `'` or a double quote. - -BODY can contain contain any character but may not span over more than 3 lines. - -BORDER and BODY are not separated by whitespaces. - -CONTENTS can contain any object encountered in a paragraph when markup is -**bold**, **italic**, **strike-through** or **underline**. - -POST is a whitespace character, `-`, `.`, `,`, `:`, `!`, `?`, `'`, `)`, `}` or a -double quote. It can also be an end of line. - -PRE, MARKER, CONTENTS, MARKER and POST are not separated by whitespace -characters. - ---- - -> All of this is wrong if `org-emphasis-regexp-components` or -> `org-emphasis-alist` are modified. -> -> This should really be simplified. -> -> Also, CONTENTS should be anything within code and verbatim emphasis, by -> definition. — ngz - -# Footnotes - -1 In particular, the parser requires -stars at column 0 to be quoted by a comma when they do not define a headline. - -2 It also means that only headlines -and sections can be recognized just by looking at the beginning of the line. -Planning lines and property drawers can be recognized by looking at one or two -lines above. - -As a consequence, using `org-element-at-point` or `org-element-context` will -move up to the parent headline, and parse top-down from there until context -around original location is found. diff --git a/examples/custom.rs b/examples/custom.rs deleted file mode 100644 index 3f650ff..0000000 --- a/examples/custom.rs +++ /dev/null @@ -1,81 +0,0 @@ -use std::convert::From; -use std::env::args; -use std::fs; -use std::io::{Error as IOError, Write}; -use std::result::Result; -use std::string::FromUtf8Error; - -use orgize::export::{DefaultHtmlHandler, HtmlHandler}; -use orgize::{Element, Org}; -use slugify::slugify; - -#[derive(Debug)] -enum MyError { - IO(IOError), - Heading, - Utf8(FromUtf8Error), -} - -// From trait is required for custom error type -impl From for MyError { - fn from(err: IOError) -> Self { - MyError::IO(err) - } -} - -impl From for MyError { - fn from(err: FromUtf8Error) -> Self { - MyError::Utf8(err) - } -} - -#[derive(Default)] -struct MyHtmlHandler(DefaultHtmlHandler); - -impl HtmlHandler for MyHtmlHandler { - fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { - if let Element::Title(title) = element { - if title.level > 6 { - return Err(MyError::Heading); - } else { - write!( - w, - "", - title.level, - slugify!(&title.raw), - )?; - } - } else { - // fallthrough to default handler - self.0.start(w, element)?; - } - Ok(()) - } - - fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { - if let Element::Title(title) = element { - write!(w, "", title.level)?; - } else { - self.0.end(w, element)?; - } - Ok(()) - } -} - -fn main() -> Result<(), MyError> { - let args: Vec<_> = args().collect(); - - if args.len() < 2 { - eprintln!("Usage: {} ", args[0]); - } else { - let contents = String::from_utf8(fs::read(&args[1])?)?; - - let mut writer = Vec::new(); - let mut handler = MyHtmlHandler::default(); - Org::parse(&contents).write_html_custom(&mut writer, &mut handler)?; - - println!("{}", String::from_utf8(writer)?); - } - - Ok(()) -} diff --git a/examples/html-slugify.rs b/examples/html-slugify.rs new file mode 100644 index 0000000..57972dd --- /dev/null +++ b/examples/html-slugify.rs @@ -0,0 +1,44 @@ +//! ```bash +//! cargo run --example html-slugify '* hello world!' +//! ``` + +use orgize::{ + export::HtmlExport, + export::{from_fn_with_ctx, Container, Event, Traverser}, + Org, +}; +use slugify::slugify; +use std::cmp::min; +use std::env::args; + +fn main() { + let args: Vec<_> = args().collect(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + } else { + let mut html_export = HtmlExport::default(); + + let mut handler = from_fn_with_ctx(|event, ctx| { + if let Event::Enter(Container::Headline(headline)) = event { + let level = min(headline.level(), 6); + let title = headline.title().map(|e| e.to_string()).collect::(); + html_export.push_str(format!( + "", + slugify!(&title) + )); + for elem in headline.title() { + html_export.element(elem, ctx); + } + html_export.push_str(format!("")); + } else { + // forward to default html export + html_export.event(event, ctx); + } + }); + + Org::parse(&args[1]).traverse(&mut handler); + + println!("{}", html_export.finish()); + } +} diff --git a/examples/iter.rs b/examples/iter.rs deleted file mode 100644 index 1f95f67..0000000 --- a/examples/iter.rs +++ /dev/null @@ -1,19 +0,0 @@ -use orgize::Org; -use std::env::args; -use std::fs; -use std::io::Result; - -fn main() -> Result<()> { - let args: Vec<_> = args().collect(); - - if args.len() < 2 { - eprintln!("Usage: {} ", args[0]); - } else { - let contents = String::from_utf8(fs::read(&args[1])?).unwrap(); - - for event in Org::parse(&contents).iter() { - println!("{:?}", event); - } - } - Ok(()) -} diff --git a/examples/json.rs b/examples/json.rs deleted file mode 100644 index e2ac5cf..0000000 --- a/examples/json.rs +++ /dev/null @@ -1,17 +0,0 @@ -use orgize::Org; -use serde_json::to_string; -use std::env::args; -use std::fs; -use std::io::Result; - -fn main() -> Result<()> { - let args: Vec<_> = args().collect(); - - if args.len() < 2 { - eprintln!("Usage: {} ", args[0]); - } else { - let contents = String::from_utf8(fs::read(&args[1])?).unwrap(); - println!("{}", to_string(&Org::parse(&contents)).unwrap()); - } - Ok(()) -} diff --git a/examples/markdown.rs b/examples/markdown.rs new file mode 100644 index 0000000..137c1db --- /dev/null +++ b/examples/markdown.rs @@ -0,0 +1,23 @@ +//! ```bash +//! cargo run --example markdown test.org +//! ``` + +use orgize::{export::MarkdownExport, Org}; +use std::{env::args, fs}; + +fn main() { + let args: Vec<_> = args().collect(); + + if args.len() < 2 { + panic!("Usage: {} ", args[0]); + } + + let content = fs::read_to_string(&args[1]).unwrap(); + + let mut export = MarkdownExport::default(); + Org::parse(content).traverse(&mut export); + + fs::write(format!("{}.md", &args[1]), export.finish()).unwrap(); + + println!("Wrote to {}.md", &args[1]); +} diff --git a/examples/parse.rs b/examples/parse.rs new file mode 100644 index 0000000..978371d --- /dev/null +++ b/examples/parse.rs @@ -0,0 +1,30 @@ +//! ```bash +//! cargo run --example parse '* hello\n** /world/!' +//! ``` + +use orgize::Org; +use rowan::ast::AstNode; +use std::env::args; +use tracing_subscriber::fmt::format::FmtSpan; + +fn main() { + let args: Vec<_> = args().collect(); + + tracing_subscriber::fmt() + .without_time() + .with_file(true) + .with_span_events(FmtSpan::NEW) + .with_line_number(true) + .with_max_level(tracing::Level::TRACE) + .with_file(false) + .with_line_number(false) + .init(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + } else { + let s = &args[1].replace(r"\n", "\n").replace(r"\r", "\r"); + let org = Org::parse(s); + println!("{:#?}", org.document().syntax()); + } +} diff --git a/fuzz/.gitignore b/fuzz/.gitignore index a092511..1a45eee 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,3 +1,4 @@ target corpus artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 9162691..eeb3de4 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,20 +1,27 @@ [package] name = "orgize-fuzz" -version = "0.0.1" -authors = ["Automatically generated"] +version = "0.0.0" publish = false +edition = "2018" [package.metadata] cargo-fuzz = true [dependencies] -libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } -orgize = { path = ".." } +libfuzzer-sys = "0.4" + +[dependencies.orgize] +path = ".." # Prevent this from interfering with workspaces [workspace] members = ["."] +[profile.release] +debug = 1 + [[bin]] name = "fuzz_target_1" path = "fuzz_targets/fuzz_target_1.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index bee8bcb..4511ba0 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -1,14 +1,7 @@ #![no_main] -#[macro_use] -extern crate libfuzzer_sys; -extern crate orgize; - -use orgize::Org; - -#[cfg_attr(rustfmt, rustfmt_skip)] libfuzzer_sys::fuzz_target!(|data: &[u8]| { - if let Ok(s) = std::str::from_utf8(data) { - let _ = Org::parse(s); + if let Ok(utf8) = std::str::from_utf8(data) { + let _ = orgize::Org::parse(utf8); } }); diff --git a/src/ast/affiliated_keyword.rs b/src/ast/affiliated_keyword.rs new file mode 100644 index 0000000..556c002 --- /dev/null +++ b/src/ast/affiliated_keyword.rs @@ -0,0 +1,52 @@ +use crate::syntax::SyntaxKind; + +use super::{filter_token, AffiliatedKeyword, Token}; + +impl AffiliatedKeyword { + /// + /// ```rust + /// use orgize::{Org, ast::AffiliatedKeyword}; + /// + /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.key(), "CAPTION"); + /// ``` + pub fn key(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("keyword must contains TEXT") + } + + /// + /// ```rust + /// use orgize::{Org, ast::AffiliatedKeyword}; + /// + /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); + /// assert!(keyword.optional().is_none()); + /// let keyword = Org::parse("#+CAPTION[OPTIONAL]: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.optional().unwrap(), "OPTIONAL"); + /// ``` + pub fn optional(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|it| it.kind() != SyntaxKind::L_BRACKET) + .nth(1) + .and_then(filter_token(SyntaxKind::TEXT)) + } + + /// + /// ```rust + /// use orgize::{Org, ast::AffiliatedKeyword}; + /// + /// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.value().unwrap(), " VALUE"); + /// let keyword = Org::parse("#+CAPTION[OPTIONAL]:VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.value().unwrap(), "VALUE"); + /// ``` + pub fn value(&self) -> Option { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .last() + } +} diff --git a/src/ast/block.rs b/src/ast/block.rs new file mode 100644 index 0000000..8e7fad5 --- /dev/null +++ b/src/ast/block.rs @@ -0,0 +1,194 @@ +use super::{ + filter_token, CenterBlock, CommentBlock, DynBlock, ExampleBlock, ExportBlock, QuoteBlock, + SourceBlock, SpecialBlock, SyntaxKind, Token, VerseBlock, +}; +use rowan::TextSize; + +impl SourceBlock { + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.language().unwrap(), "c"); + /// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.language().unwrap(), "javascript"); + /// + /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); + /// assert!(block.language().is_none()); + /// ```` + pub fn language(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE)) + } + + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.switches().unwrap(), "-n 20"); + /// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.switches().unwrap(), "-n 20 -r"); + /// + /// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// let block = Org::parse("#+begin_src\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::().unwrap(); + /// assert!(block.switches().is_none()); + /// ```` + pub fn switches(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES)) + } + + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.parameters().unwrap(), ":tangle yes"); + /// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::().unwrap(); + /// assert_eq!(block.parameters().unwrap(), ":tangle"); + /// + /// let block = Org::parse("#+begin_src c\n#+end_src").first_node::().unwrap(); + /// assert!(block.parameters().is_none()); + /// ```` + pub fn parameters(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS)) + } + + /// Return unescaped source code string + /// + /// ```rust + /// use orgize::{Org, ast::SourceBlock}; + /// + /// let block = Org::parse(r#" + /// #+begin_src + /// #+end_src + /// "#).first_node::().unwrap(); + /// assert_eq!(block.value(), ""); + /// + /// let block = Org::parse(r#" + /// #+begin_src + /// ,* foo + /// ,#+ bar + /// #+end_src + /// "#).first_node::().unwrap(); + /// assert_eq!(block.value(), "* foo\n#+ bar\n"); + /// ```` + pub fn value(&self) -> String { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, value| acc + &value) + } +} + +impl ExportBlock { + /// ```rust + /// use orgize::{Org, ast::ExportBlock}; + /// + /// let block = Org::parse("#+begin_export html\n#+end_export").first_node::().unwrap(); + /// assert_eq!(block.ty().unwrap(), "html"); + /// + /// let block = Org::parse("#+begin_export\n#+end_export").first_node::().unwrap(); + /// assert!(block.ty().is_none()); + /// ```` + pub fn ty(&self) -> Option { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE)) + } + + /// Returns export block contents + /// + /// ```rust + /// use orgize::{Org, ast::ExportBlock}; + /// + /// let block = Org::parse(r#" + /// #+begin_export html + /// + /// #+end_export + /// "#).first_node::().unwrap(); + /// assert_eq!(block.value(), "\n"); + /// + /// let block = Org::parse(r#" + /// #+BEGIN_EXPORT org + /// ,#+BEGIN_EXPORT html + /// + /// ,#+END_EXPORT + /// #+END_EXPORT + /// "#).first_node::().unwrap(); + /// assert_eq!(block.value(), r#"#+BEGIN_EXPORT html + /// + /// #+END_EXPORT + /// "#); + /// ``` + pub fn value(&self) -> String { + self.syntax + .children() + .find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, value| acc + &value) + } +} + +macro_rules! impl_content_border { + ($block:ident) => { + impl $block { + /// Beginning position of block content + pub fn content_start(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::BLOCK_BEGIN) + .map(|n| n.text_range().end()) + .unwrap_or_else(|| { + debug_assert!(false, "block must contains BLOCK_BEGIN"); + TextSize::default() + }) + } + + /// Ending position of block content + pub fn content_end(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::BLOCK_END) + .map(|n| n.text_range().start()) + .unwrap_or_else(|| { + debug_assert!(false, "block must contains BLOCK_END"); + TextSize::default() + }) + } + } + }; +} + +impl_content_border!(SourceBlock); +impl_content_border!(ExportBlock); +impl_content_border!(CenterBlock); +impl_content_border!(CommentBlock); +impl_content_border!(ExampleBlock); +impl_content_border!(QuoteBlock); +impl_content_border!(SpecialBlock); +impl_content_border!(VerseBlock); +impl_content_border!(DynBlock); diff --git a/src/ast/clock.rs b/src/ast/clock.rs new file mode 100644 index 0000000..e8df645 --- /dev/null +++ b/src/ast/clock.rs @@ -0,0 +1,58 @@ +use rowan::ast::support; + +use crate::{ast::Token, SyntaxKind}; + +use super::{Clock, Timestamp}; + +impl Clock { + pub fn value(&self) -> Option { + support::child(&self.syntax) + } + + /// ```rust + /// use orgize::{Org, ast::Clock}; + /// + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); + /// assert!(clock.duration().is_none()); + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); + /// assert_eq!(clock.duration().unwrap(), "12:00"); + /// + /// ``` + pub fn duration(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|t| t.kind() != SyntaxKind::DOUBLE_ARROW) + .skip(1) + .find(|t| t.kind() != SyntaxKind::WHITESPACE) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } + + /// ```rust + /// use orgize::{Org, ast::Clock}; + /// + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); + /// assert!(!clock.is_closed()); + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); + /// assert!(clock.is_closed()); + /// ``` + pub fn is_closed(&self) -> bool { + self.syntax + .children_with_tokens() + .any(|t| t.kind() == SyntaxKind::DOUBLE_ARROW) + } + + /// ```rust + /// use orgize::{Org, ast::Clock}; + /// + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::().unwrap(); + /// assert!(clock.is_running()); + /// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::().unwrap(); + /// assert!(!clock.is_running()); + /// ``` + pub fn is_running(&self) -> bool { + !self.is_closed() + } +} diff --git a/src/ast/cloze.rs b/src/ast/cloze.rs new file mode 100644 index 0000000..f85484b --- /dev/null +++ b/src/ast/cloze.rs @@ -0,0 +1,111 @@ +use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode}; +use rowan::{ast::AstNode, TextRange, TextSize}; + +use super::Token; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Cloze { + pub(crate) syntax: SyntaxNode, +} + +impl AstNode for Cloze { + type Language = OrgLanguage; + + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::CLOZE + } + + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Cloze { syntax: node }) + } + + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} + +impl Cloze { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + + pub fn text(&self) -> impl Iterator { + self.syntax + .children_with_tokens() + .skip(1) + .take_while(|n| n.kind() != SyntaxKind::R_CURLY) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), "text"); + /// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$"); + /// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::().unwrap(); + /// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] "); + /// ``` + pub fn text_raw(&self) -> String { + self.text() + .fold(String::new(), |acc, e| acc + &e.to_string()) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert!(cloze.hint().is_none()); + /// let cloze = Org::parse("{{text}{}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.hint().unwrap(), ""); + /// let cloze = Org::parse("{{text}{hint}}").first_node::().unwrap(); + /// assert_eq!(cloze.hint().unwrap(), "hint"); + /// ``` + pub fn hint(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|n| n.kind() != SyntaxKind::L_CURLY) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } + + /// ```rust + /// use orgize::{Org, ast::Cloze}; + /// + /// let cloze = Org::parse("{{text}}").first_node::().unwrap(); + /// assert!(cloze.id().is_none()); + /// let cloze = Org::parse("{{text}@}").first_node::().unwrap(); + /// assert_eq!(cloze.id().unwrap(), ""); + /// let cloze = Org::parse("{{text}@id}").first_node::().unwrap(); + /// assert_eq!(cloze.id().unwrap(), "id"); + /// ``` + pub fn id(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|n| n.kind() != SyntaxKind::AT) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } +} diff --git a/src/ast/comment.rs b/src/ast/comment.rs new file mode 100644 index 0000000..a08bc39 --- /dev/null +++ b/src/ast/comment.rs @@ -0,0 +1,20 @@ +use crate::SyntaxKind; + +use super::{filter_token, Comment}; + +impl Comment { + /// Contents without pound signs + /// + /// ```rust + /// use orgize::{ast::Comment, Org}; + /// + /// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::().unwrap(); + /// assert_eq!(fixed.value(), "A\n\nB\nC"); + /// ``` + pub fn value(&self) -> String { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, text| acc + &text) + } +} diff --git a/src/ast/document.rs b/src/ast/document.rs new file mode 100644 index 0000000..f248769 --- /dev/null +++ b/src/ast/document.rs @@ -0,0 +1,88 @@ +use rowan::ast::AstNode; + +use crate::Org; + +use super::{Document, Keyword, PropertyDrawer}; + +impl Document { + /// Returns an iterator of keywords in zeroth section + /// + /// ```rust + /// use orgize::{Org, ast::Document}; + /// + /// let org = Org::parse(r#" + /// #+TITLE: hello + /// #+TITLE: world + /// #+DATE: today + /// #+AUTHOR: poi + /// * headline + /// #+SOMETHING:"#); + /// let doc = org.first_node::().unwrap(); + /// assert_eq!(doc.keywords().count(), 4); + /// ``` + pub fn keywords(&self) -> impl Iterator { + self.section() + .into_iter() + .flat_map(|section| section.syntax.children().filter_map(Keyword::cast)) + } + + /// Returns the value in top-level `#+TITLE` + /// + /// Multiple `#+TITLE` are joined with spaces. + /// + /// Returns `None` if file doesn't contain `#+TITLE` + /// + /// ```rust + /// use orgize::{Org, ast::Document}; + /// + /// let org = Org::parse("#+TITLE: hello\n#+TITLE: world"); + /// let doc = org.first_node::().unwrap(); + /// assert_eq!(doc.title().unwrap(), "hello world"); + /// + /// let org = Org::parse(""); + /// let doc = org.first_node::().unwrap(); + /// assert!(doc.title().is_none()); + /// ``` + pub fn title(&self) -> Option { + self.keywords() + .filter(|kw| kw.key().eq_ignore_ascii_case("TITLE")) + .fold(Option::::None, |acc, cur| { + let mut s = acc.unwrap_or_default(); + if !s.is_empty() { + s.push(' '); + } + s.push_str(cur.value().trim()); + Some(s) + }) + } + + /// Returns top-level properties drawer + /// + /// ```rust + /// use orgize::{Org, ast::Document}; + /// + /// let org = Org::parse(r#":PROPERTIES: + /// :ID: 20220718T085035.042592 + /// :END: + /// #+TITLE: Complete Computing"#); + /// + /// let properties = org.document().properties().unwrap(); + /// assert_eq!(properties.to_hash_map().len(), 1); + /// assert_eq!(properties.get("ID").unwrap(), "20220718T085035.042592"); + /// ``` + pub fn properties(&self) -> Option { + rowan::ast::support::child(&self.syntax) + } +} + +impl Org { + /// Equals to `self.document().title()`, see [Document::title] + pub fn title(&self) -> Option { + self.document().title() + } + + /// Equals to `self.document().keywords()`, see [Document::keywords] + pub fn keywords(&self) -> impl Iterator { + self.document().keywords() + } +} diff --git a/src/ast/drawer.rs b/src/ast/drawer.rs new file mode 100644 index 0000000..3111ee6 --- /dev/null +++ b/src/ast/drawer.rs @@ -0,0 +1,140 @@ +use rowan::TextSize; +use std::collections::HashMap; + +use super::{filter_token, Drawer, PropertyDrawer, SyntaxKind, Token}; + +impl PropertyDrawer { + /// ```rust + /// use orgize::{Org, ast::PropertyDrawer}; + /// + /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); + /// let drawer = org.first_node::().unwrap(); + /// assert_eq!(drawer.iter().count(), 2); + /// ``` + pub fn iter(&self) -> impl Iterator { + self.node_properties().filter_map(|property| { + let mut texts = property + .syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)); + + Some((texts.next()?, texts.next()?)) + }) + } + + /// ```rust + /// use orgize::{Org, ast::PropertyDrawer}; + /// + /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); + /// let drawer = org.first_node::().unwrap(); + /// assert_eq!(drawer.get("CUSTOM_ID").unwrap(), "someid"); + /// assert_eq!(drawer.get("ID").unwrap(), "id"); + /// ``` + pub fn get(&self, key: &str) -> Option { + self.iter().find_map(|(k, v)| (k == key).then_some(v)) + } + + /// ```rust + /// use orgize::{Org, ast::PropertyDrawer}; + /// + /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:CUSTOM_ID: id\n:END:"); + /// let drawer = org.first_node::().unwrap(); + /// let map = drawer.to_hash_map(); + /// assert_eq!(map.len(), 1); + /// assert_eq!(map.get("CUSTOM_ID").unwrap(), "id"); + /// ``` + pub fn to_hash_map(&self) -> HashMap { + self.iter().collect() + } + + #[cfg(feature = "indexmap")] + /// ```rust + /// use orgize::{Org, ast::PropertyDrawer}; + /// + /// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:"); + /// let drawer = org.first_node::().unwrap(); + /// let map = drawer.to_index_map(); + /// let item1 = map.get_index(1).unwrap(); + /// assert_eq!(item1.0, "ID"); + /// assert_eq!(item1.1, "id"); + /// ``` + pub fn to_index_map(&self) -> indexmap::IndexMap { + self.iter().collect() + } + + /// Beginning position of drawer content + pub fn content_start(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) + .map(|n| n.text_range().end()) + .unwrap_or_else(|| { + debug_assert!(false, "property drawer must contains DRAWER_BEGIN"); + TextSize::default() + }) + } + + /// Ending position of drawer content + pub fn content_end(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_END) + .map(|n| n.text_range().start()) + .unwrap_or_else(|| { + debug_assert!(false, "property drawer must contains DRAWER_END"); + TextSize::default() + }) + } +} + +impl Drawer { + /// ```rust + /// use orgize::{Org, ast::Drawer}; + /// + /// let org = Org::parse("* Heading\n:LOGBOOK:\n:END:"); + /// let drawer = org.first_node::().unwrap(); + /// assert_eq!(drawer.name(), "LOGBOOK"); + /// ``` + pub fn name(&self) -> Token { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) + .expect("drawer must contains DRAWER_BEGIN") + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("drawer begin must contains TEXT") + } + + /// Beginning position of drawer content + pub fn content_start(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN) + .map(|n| n.text_range().end()) + .unwrap_or_else(|| { + debug_assert!(false, "drawer must contains DRAWER_BEGIN"); + TextSize::default() + }) + } + + /// Ending position of drawer content + pub fn content_end(&self) -> TextSize { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_END) + .map(|n| n.text_range().start()) + .unwrap_or_else(|| { + debug_assert!(false, "drawer must contains DRAWER_END"); + TextSize::default() + }) + } + + /// Raw text of drawer content + pub fn content_raw(&self) -> String { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::DRAWER_CONTENT) + .map(|n| n.to_string()) + .unwrap_or_default() + } +} diff --git a/src/ast/entity.rs b/src/ast/entity.rs new file mode 100644 index 0000000..49cd19c --- /dev/null +++ b/src/ast/entity.rs @@ -0,0 +1,168 @@ +use crate::{entities::ENTITIES, SyntaxKind}; + +use super::{filter_token, Entity}; + +impl Entity { + fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> { + let token = self + .syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT))?; + + ENTITIES.iter().find(|i| i.0 == token.as_ref()) + } + + /// Entity name + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\alpha{}").first_node::().unwrap(); + /// assert_eq!(e.name(), "alpha"); + /// let e = Org::parse("\\_ ").first_node::().unwrap(); + /// assert_eq!(e.name(), " "); + /// ``` + pub fn name(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.0, + ) + } + + /// Entity LaTeX representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\middot").first_node::().unwrap(); + /// assert_eq!(e.latex(), "\\textperiodcentered{}"); + /// ``` + pub fn latex(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.1, + ) + } + + /// Whether entity needs to be in math mode + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\middot").first_node::().unwrap(); + /// assert!(!e.is_latex_math()); + /// let e = Org::parse("\\alefsym").first_node::().unwrap(); + /// assert!(e.is_latex_math()); + /// ``` + pub fn is_latex_math(&self) -> bool { + self.entity().map_or_else( + || { + debug_assert!(false); + false + }, + |e| e.2, + ) + } + + /// Entity HTML representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.html(), "§"); + /// ``` + pub fn html(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.3, + ) + } + + /// Entity ASCII representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.ascii(), "section"); + /// ``` + pub fn ascii(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.4, + ) + } + + /// Entity Latin1 encoding representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.latin1(), "§"); + /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); + /// assert_eq!(e.latin1(), ">"); + /// ``` + pub fn latin1(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.5, + ) + } + + /// Entity UTF-8 encoding representation + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\S").first_node::().unwrap(); + /// assert_eq!(e.utf8(), "§"); + /// let e = Org::parse("\\rsaquo").first_node::().unwrap(); + /// assert_eq!(e.utf8(), "›"); + /// ``` + pub fn utf8(&self) -> &str { + self.entity().map_or_else( + || { + debug_assert!(false); + "" + }, + |e| e.6, + ) + } + + /// Entity contains optional brackets + /// + /// ```rust + /// use orgize::{ast::Entity, Org}; + /// + /// let e = Org::parse("\\beta").first_node::().unwrap(); + /// assert!(!e.is_use_brackets()); + /// let e = Org::parse("\\S{}").first_node::().unwrap(); + /// assert!(e.is_use_brackets()); + /// let e = Org::parse("\\_ ").first_node::().unwrap(); + /// assert!(!e.is_use_brackets()); + /// ``` + pub fn is_use_brackets(&self) -> bool { + self.syntax + .children_with_tokens() + .filter(|n| n.kind() == SyntaxKind::TEXT) + .nth(1) + .is_some() + } +} diff --git a/src/ast/fixed_width.rs b/src/ast/fixed_width.rs new file mode 100644 index 0000000..b601738 --- /dev/null +++ b/src/ast/fixed_width.rs @@ -0,0 +1,20 @@ +use crate::SyntaxKind; + +use super::{filter_token, FixedWidth}; + +impl FixedWidth { + /// Contents without colons prefix + /// + /// ```rust + /// use orgize::{ast::FixedWidth, Org}; + /// + /// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::().unwrap(); + /// assert_eq!(fixed.value(), "A\n\nB\nC"); + /// ``` + pub fn value(&self) -> String { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .fold(String::new(), |acc, text| acc + &text) + } +} diff --git a/src/ast/generate.js b/src/ast/generate.js new file mode 100644 index 0000000..81bbc4f --- /dev/null +++ b/src/ast/generate.js @@ -0,0 +1,357 @@ +const nodes = [ + { + struct: "Document", + kind: ["DOCUMENT"], + pre_blank: true, + first_child: [ + ["section", "Section"], + ["first_headline", "Headline"], + ], + last_child: [["last_headline", "Headline"]], + children: [["headlines", "Headline"]], + }, + { + struct: "Section", + kind: ["SECTION"], + post_blank: true, + }, + { + struct: "Paragraph", + kind: ["PARAGRAPH"], + post_blank: true, + affiliated_keywords: true, + }, + { + struct: "Headline", + kind: ["HEADLINE"], + first_child: [ + ["section", "Section"], + ["planning", "Planning"], + ["properties", "PropertyDrawer"], + ], + children: [["headlines", "Headline"]], + post_blank: true, + }, + { + struct: "PropertyDrawer", + kind: ["PROPERTY_DRAWER"], + children: [["node_properties", "NodeProperty"]], + }, + { + struct: "NodeProperty", + kind: ["NODE_PROPERTY"], + }, + { + struct: "Planning", + kind: ["PLANNING"], + }, + { + struct: "OrgTable", + kind: ["ORG_TABLE"], + post_blank: true, + affiliated_keywords: true, + }, + { + struct: "OrgTableRow", + kind: ["ORG_TABLE_RULE_ROW", "ORG_TABLE_STANDARD_ROW"], + }, + { + struct: "OrgTableCell", + kind: ["ORG_TABLE_CELL"], + }, + { + struct: "List", + kind: ["LIST"], + children: [["items", "ListItem"]], + affiliated_keywords: true, + }, + { + struct: "ListItem", + kind: ["LIST_ITEM"], + }, + { + struct: "Drawer", + kind: ["DRAWER"], + }, + { + struct: "DynBlock", + kind: ["DYN_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "Keyword", + kind: ["KEYWORD"], + }, + { + struct: "BabelCall", + kind: ["BABEL_CALL"], + }, + { + struct: "AffiliatedKeyword", + kind: ["AFFILIATED_KEYWORD"], + }, + { + struct: "TableEl", + kind: ["TABLE_EL"], + post_blank: true, + }, + { + struct: "Clock", + kind: ["CLOCK"], + post_blank: true, + }, + { + struct: "FnDef", + kind: ["FN_DEF"], + post_blank: true, + affiliated_keywords: true, + }, + { + struct: "Comment", + kind: ["COMMENT"], + post_blank: true, + token: [["text", "TEXT"]], + affiliated_keywords: true, + }, + { + struct: "Rule", + kind: ["RULE"], + post_blank: true, + }, + { + struct: "FixedWidth", + kind: ["FIXED_WIDTH"], + post_blank: true, + token: [["text", "TEXT"]], + affiliated_keywords: true, + }, + { + struct: "SpecialBlock", + kind: ["SPECIAL_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "QuoteBlock", + kind: ["QUOTE_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "CenterBlock", + kind: ["CENTER_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "VerseBlock", + kind: ["VERSE_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "CommentBlock", + kind: ["COMMENT_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "ExampleBlock", + kind: ["EXAMPLE_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "ExportBlock", + kind: ["EXPORT_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "SourceBlock", + kind: ["SOURCE_BLOCK"], + affiliated_keywords: true, + }, + { + struct: "InlineCall", + kind: ["INLINE_CALL"], + }, + { + struct: "InlineSrc", + kind: ["INLINE_SRC"], + }, + { + struct: "Link", + kind: ["LINK"], + }, + { + struct: "Cookie", + kind: ["COOKIE"], + }, + { + struct: "RadioTarget", + kind: ["RADIO_TARGET"], + }, + { + struct: "FnRef", + kind: ["FN_REF"], + }, + { + struct: "Macros", + kind: ["MACROS"], + }, + { + struct: "Snippet", + kind: ["SNIPPET"], + }, + { + struct: "Target", + kind: ["TARGET"], + }, + { + struct: "Bold", + kind: ["BOLD"], + }, + { + struct: "Strike", + kind: ["STRIKE"], + }, + { + struct: "Italic", + kind: ["ITALIC"], + }, + { + struct: "Underline", + kind: ["UNDERLINE"], + }, + { + struct: "Verbatim", + kind: ["VERBATIM"], + }, + { + struct: "Code", + kind: ["CODE"], + token: [["text", "TEXT"]], + }, + { + struct: "Timestamp", + kind: ["TIMESTAMP_ACTIVE", "TIMESTAMP_INACTIVE", "TIMESTAMP_DIARY"], + token: [ + ["year_start", "TIMESTAMP_YEAR"], + ["month_start", "TIMESTAMP_MONTH"], + ["day_start", "TIMESTAMP_DAY"], + ["hour_start", "TIMESTAMP_HOUR"], + ["minute_start", "TIMESTAMP_MINUTE"], + ], + last_token: [ + ["year_end", "TIMESTAMP_YEAR"], + ["month_end", "TIMESTAMP_MONTH"], + ["day_end", "TIMESTAMP_DAY"], + ["hour_end", "TIMESTAMP_HOUR"], + ["minute_end", "TIMESTAMP_MINUTE"], + ], + }, + { + struct: "LatexEnvironment", + kind: ["LATEX_ENVIRONMENT"], + }, + { + struct: "LatexFragment", + kind: ["LATEX_FRAGMENT"], + }, + { + struct: "Entity", + kind: ["ENTITY"], + }, + { + struct: "LineBreak", + kind: ["LINE_BREAK"], + }, + { + struct: "Superscript", + kind: ["SUPERSCRIPT"], + }, + { + struct: "Subscript", + kind: ["SUBSCRIPT"], + }, +]; + +let content = `//! generated file, do not modify it directly +#![allow(clippy::all)] +#![allow(unused)] + +use rowan::{ast::{support, AstChildren, AstNode}, TextSize, TextRange}; +use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken}; + +fn affiliated_keyword(node: &SyntaxNode, filter: impl Fn(&str) -> bool) -> Option { + node.children() + .take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD) + .filter_map(AffiliatedKeyword::cast) + .find(|k| filter(&k.key())) +} +`; + +for (const node of nodes) { + content += ` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ${node.struct} { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for ${node.struct} { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { ${node.kind + .map((k) => `kind == ${k}`) + .join(" || ")} } + fn cast(node: SyntaxNode) -> Option<${ + node.struct + }> { Self::can_cast(node.kind()).then(|| ${node.struct} { syntax: node }) } + fn syntax(&self) -> &SyntaxNode { &self.syntax } +} +impl ${node.struct} { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +`; + for (const [method, kind] of node.token || []) { + content += ` pub fn ${method}(&self) -> Option { super::token(&self.syntax, ${kind}) }\n`; + } + for (const [method, kind] of node.last_token || []) { + content += ` pub fn ${method}(&self) -> Option { super::last_token(&self.syntax, ${kind}) }\n`; + } + for (const [method, kind] of node.parent || []) { + content += ` pub fn ${method}(&self) -> Option<${kind}> { self.syntax.parent().and_then(${kind}::cast) }\n`; + } + for (const [method, kind] of node.first_child || []) { + content += ` pub fn ${method}(&self) -> Option<${kind}> { support::child(&self.syntax) }\n`; + } + for (const [method, kind] of node.last_child || []) { + content += ` pub fn ${method}(&self) -> Option<${kind}> { super::last_child(&self.syntax) }\n`; + } + for (const [method, kind] of node.children || []) { + content += ` pub fn ${method}(&self) -> AstChildren<${kind}> { support::children(&self.syntax) }\n`; + } + if (node.post_blank) { + content += ` pub fn post_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`; + } + if (node.pre_blank) { + content += ` pub fn pre_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`; + } + if (node.affiliated_keywords) { + content += ` pub fn caption(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "CAPTION") }\n`; + content += ` pub fn header(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "HEADER") }\n`; + content += ` pub fn name(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "NAME") }\n`; + content += ` pub fn plot(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "PLOT") }\n`; + content += ` pub fn results(&self) -> Option { affiliated_keyword(&self.syntax, |k| k == "RESULTS") }\n`; + content += ` pub fn attr(&self, backend: &str) -> Option { affiliated_keyword(&self.syntax, |k| k.starts_with("ATTR_") && &k[5..] == backend) }\n`; + } + content += `}\n`; +} + +require("fs").writeFileSync(__dirname + "/generated.rs", content); diff --git a/src/ast/generated.rs b/src/ast/generated.rs new file mode 100644 index 0000000..1bfd489 --- /dev/null +++ b/src/ast/generated.rs @@ -0,0 +1,2276 @@ +//! generated file, do not modify it directly +#![allow(clippy::all)] +#![allow(unused)] + +use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken}; +use rowan::{ + ast::{support, AstChildren, AstNode}, + TextRange, TextSize, +}; + +fn affiliated_keyword( + node: &SyntaxNode, + filter: impl Fn(&str) -> bool, +) -> Option { + node.children() + .take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD) + .filter_map(AffiliatedKeyword::cast) + .find(|k| filter(&k.key())) +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Document { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Document { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == DOCUMENT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Document { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Document { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn section(&self) -> Option
{ + support::child(&self.syntax) + } + pub fn first_headline(&self) -> Option { + support::child(&self.syntax) + } + pub fn last_headline(&self) -> Option { + super::last_child(&self.syntax) + } + pub fn headlines(&self) -> AstChildren { + support::children(&self.syntax) + } + pub fn pre_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Section { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Section { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SECTION + } + fn cast(node: SyntaxNode) -> Option
{ + Self::can_cast(node.kind()).then(|| Section { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Section { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Paragraph { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Paragraph { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == PARAGRAPH + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Paragraph { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Paragraph { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Headline { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Headline { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == HEADLINE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Headline { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Headline { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn section(&self) -> Option
{ + support::child(&self.syntax) + } + pub fn planning(&self) -> Option { + support::child(&self.syntax) + } + pub fn properties(&self) -> Option { + support::child(&self.syntax) + } + pub fn headlines(&self) -> AstChildren { + support::children(&self.syntax) + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PropertyDrawer { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for PropertyDrawer { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == PROPERTY_DRAWER + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| PropertyDrawer { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl PropertyDrawer { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn node_properties(&self) -> AstChildren { + support::children(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct NodeProperty { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for NodeProperty { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == NODE_PROPERTY + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| NodeProperty { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl NodeProperty { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Planning { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Planning { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == PLANNING + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Planning { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Planning { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct OrgTable { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for OrgTable { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ORG_TABLE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| OrgTable { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl OrgTable { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct OrgTableRow { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for OrgTableRow { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ORG_TABLE_RULE_ROW || kind == ORG_TABLE_STANDARD_ROW + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| OrgTableRow { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl OrgTableRow { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct OrgTableCell { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for OrgTableCell { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ORG_TABLE_CELL + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| OrgTableCell { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl OrgTableCell { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct List { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for List { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LIST + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| List { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl List { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn items(&self) -> AstChildren { + support::children(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ListItem { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for ListItem { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LIST_ITEM + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| ListItem { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl ListItem { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Drawer { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Drawer { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == DRAWER + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Drawer { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Drawer { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DynBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for DynBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == DYN_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| DynBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl DynBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Keyword { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Keyword { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == KEYWORD + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Keyword { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Keyword { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct BabelCall { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for BabelCall { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == BABEL_CALL + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| BabelCall { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl BabelCall { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct AffiliatedKeyword { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for AffiliatedKeyword { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == AFFILIATED_KEYWORD + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| AffiliatedKeyword { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl AffiliatedKeyword { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TableEl { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for TableEl { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == TABLE_EL + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| TableEl { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl TableEl { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Clock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Clock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == CLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Clock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Clock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FnDef { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for FnDef { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == FN_DEF + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| FnDef { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl FnDef { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Comment { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Comment { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == COMMENT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Comment { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Comment { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn text(&self) -> Option { + super::token(&self.syntax, TEXT) + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Rule { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Rule { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == RULE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Rule { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Rule { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FixedWidth { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for FixedWidth { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == FIXED_WIDTH + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| FixedWidth { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl FixedWidth { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn text(&self) -> Option { + super::token(&self.syntax, TEXT) + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SpecialBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for SpecialBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SPECIAL_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| SpecialBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl SpecialBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct QuoteBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for QuoteBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == QUOTE_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| QuoteBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl QuoteBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CenterBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for CenterBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == CENTER_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| CenterBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl CenterBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct VerseBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for VerseBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == VERSE_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| VerseBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl VerseBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CommentBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for CommentBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == COMMENT_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| CommentBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl CommentBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExampleBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for ExampleBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == EXAMPLE_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| ExampleBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl ExampleBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExportBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for ExportBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == EXPORT_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| ExportBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl ExportBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SourceBlock { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for SourceBlock { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SOURCE_BLOCK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| SourceBlock { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl SourceBlock { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InlineCall { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for InlineCall { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == INLINE_CALL + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| InlineCall { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl InlineCall { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InlineSrc { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for InlineSrc { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == INLINE_SRC + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| InlineSrc { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl InlineSrc { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Link { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Link { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LINK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Link { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Link { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Cookie { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Cookie { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == COOKIE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Cookie { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Cookie { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct RadioTarget { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for RadioTarget { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == RADIO_TARGET + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| RadioTarget { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl RadioTarget { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FnRef { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for FnRef { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == FN_REF + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| FnRef { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl FnRef { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Macros { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Macros { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == MACROS + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Macros { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Macros { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Snippet { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Snippet { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SNIPPET + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Snippet { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Snippet { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Target { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Target { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == TARGET + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Target { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Target { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Bold { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Bold { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == BOLD + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Bold { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Bold { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Strike { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Strike { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == STRIKE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Strike { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Strike { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Italic { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Italic { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ITALIC + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Italic { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Italic { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Underline { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Underline { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == UNDERLINE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Underline { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Underline { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Verbatim { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Verbatim { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == VERBATIM + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Verbatim { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Verbatim { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Code { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Code { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == CODE + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Code { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Code { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn text(&self) -> Option { + super::token(&self.syntax, TEXT) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Timestamp { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Timestamp { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == TIMESTAMP_ACTIVE || kind == TIMESTAMP_INACTIVE || kind == TIMESTAMP_DIARY + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Timestamp { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Timestamp { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn year_start(&self) -> Option { + super::token(&self.syntax, TIMESTAMP_YEAR) + } + pub fn month_start(&self) -> Option { + super::token(&self.syntax, TIMESTAMP_MONTH) + } + pub fn day_start(&self) -> Option { + super::token(&self.syntax, TIMESTAMP_DAY) + } + pub fn hour_start(&self) -> Option { + super::token(&self.syntax, TIMESTAMP_HOUR) + } + pub fn minute_start(&self) -> Option { + super::token(&self.syntax, TIMESTAMP_MINUTE) + } + pub fn year_end(&self) -> Option { + super::last_token(&self.syntax, TIMESTAMP_YEAR) + } + pub fn month_end(&self) -> Option { + super::last_token(&self.syntax, TIMESTAMP_MONTH) + } + pub fn day_end(&self) -> Option { + super::last_token(&self.syntax, TIMESTAMP_DAY) + } + pub fn hour_end(&self) -> Option { + super::last_token(&self.syntax, TIMESTAMP_HOUR) + } + pub fn minute_end(&self) -> Option { + super::last_token(&self.syntax, TIMESTAMP_MINUTE) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LatexEnvironment { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for LatexEnvironment { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LATEX_ENVIRONMENT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| LatexEnvironment { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl LatexEnvironment { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LatexFragment { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for LatexFragment { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LATEX_FRAGMENT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| LatexFragment { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl LatexFragment { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Entity { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Entity { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == ENTITY + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Entity { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Entity { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LineBreak { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for LineBreak { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LINE_BREAK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| LineBreak { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl LineBreak { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Superscript { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Superscript { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SUPERSCRIPT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Superscript { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Superscript { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Subscript { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for Subscript { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == SUBSCRIPT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| Subscript { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl Subscript { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } +} diff --git a/src/ast/headline.rs b/src/ast/headline.rs new file mode 100644 index 0000000..a465053 --- /dev/null +++ b/src/ast/headline.rs @@ -0,0 +1,276 @@ +use rowan::{ast::AstNode, NodeOrToken}; + +use crate::{syntax::SyntaxKind, SyntaxElement}; + +use super::{filter_token, Clock, Drawer, Headline, Section, Timestamp, Token}; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum TodoType { + Todo, + Done, +} + +impl Headline { + /// Return level of this headline + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* ").first_node::().unwrap(); + /// assert_eq!(hdl.level(), 1); + /// let hdl = Org::parse("****** hello").first_node::().unwrap(); + /// assert_eq!(hdl.level(), 6); + /// ``` + pub fn level(&self) -> usize { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::HEADLINE_STARS)) + .map_or_else( + || { + debug_assert!(false, "headline must contains HEADLINE_STARS"); + 0 + }, + |stars| stars.len(), + ) + } + + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); + /// assert_eq!(hdl.todo_keyword().unwrap(), "TODO"); + /// ``` + pub fn todo_keyword(&self) -> Option { + self.syntax + .children_with_tokens() + .find_map(|elem| match elem { + NodeOrToken::Token(tk) + if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO + || tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => + { + Some(Token(tk)) + } + _ => None, + }) + } + + /// ```rust + /// use orgize::{Org, ast::{Headline, TodoType}}; + /// + /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); + /// assert_eq!(hdl.todo_type().unwrap(), TodoType::Todo); + /// let hdl = Org::parse("*** DONE a").first_node::().unwrap(); + /// assert_eq!(hdl.todo_type().unwrap(), TodoType::Done); + /// ``` + pub fn todo_type(&self) -> Option { + self.syntax + .children_with_tokens() + .find_map(|elem| match elem { + NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO => { + Some(TodoType::Todo) + } + NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => { + Some(TodoType::Done) + } + _ => None, + }) + } + + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* TODO a").first_node::().unwrap(); + /// assert!(hdl.is_todo()); + /// let hdl = Org::parse("* a").first_node::().unwrap(); + /// assert!(!hdl.is_todo()); + /// ``` + pub fn is_todo(&self) -> bool { + matches!(self.todo_type(), Some(TodoType::Todo)) + } + + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* DONE a").first_node::().unwrap(); + /// assert!(hdl.is_done()); + /// let hdl = Org::parse("* a").first_node::().unwrap(); + /// assert!(!hdl.is_done()); + /// ``` + pub fn is_done(&self) -> bool { + matches!(self.todo_type(), Some(TodoType::Done)) + } + + /// Returns parsed title + /// + /// ```rust + /// use orgize::{Org, ast::Headline, SyntaxKind}; + /// + /// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::().unwrap(); + /// let title = hdl.title().collect::>(); + /// assert_eq!(title[1].kind(), SyntaxKind::BOLD); + /// assert_eq!(title[1].to_string(), "*abc*"); + /// assert_eq!(title[3].kind(), SyntaxKind::ITALIC); + /// assert_eq!(title[3].to_string(), "/abc/"); + /// ``` + pub fn title(&self) -> impl Iterator { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE) + .into_iter() + .flat_map(|n| n.children_with_tokens()) + } + + /// Returns title raw string + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::().unwrap(); + /// let title = hdl.title_raw(); + /// assert_eq!(title, "abc *abc* /abc/ "); + /// ``` + pub fn title_raw(&self) -> String { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE) + .map(|n| n.to_string()) + .unwrap_or_default() + } + + /// Return `true` if this headline contains a COMMENT keyword + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* COMMENT").first_node::().unwrap(); + /// assert!(hdl.is_commented()); + /// let hdl = Org::parse("* COMMENT hello").first_node::().unwrap(); + /// assert!(hdl.is_commented()); + /// let hdl = Org::parse("* hello").first_node::().unwrap(); + /// assert!(!hdl.is_commented()); + /// ``` + pub fn is_commented(&self) -> bool { + self.title() + .next() + .map(|first| { + if let Some(t) = first.as_token() { + let text = t.text(); + t.kind() == SyntaxKind::TEXT + && text.starts_with("COMMENT") + && (text.len() == 7 || text[7..].starts_with(char::is_whitespace)) + } else { + false + } + }) + .unwrap_or_default() + } + + /// Return `true` if this headline contains an archive tag + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* hello :ARCHIVE:").first_node::().unwrap(); + /// assert!(hdl.is_archived()); + /// let hdl = Org::parse("* hello :ARCHIVED:").first_node::().unwrap(); + /// assert!(!hdl.is_archived()); + /// ``` + pub fn is_archived(&self) -> bool { + self.tags().any(|t| t == "ARCHIVE") + } + + /// Returns this headline's closed timestamp, or `None` if not set. + pub fn closed(&self) -> Option { + self.planning().and_then(|planning| planning.closed()) + } + + /// Returns this headline's scheduled timestamp, or `None` if not set. + pub fn scheduled(&self) -> Option { + self.planning().and_then(|planning| planning.scheduled()) + } + + /// Returns this headline's deadline timestamp, or `None` if not set. + pub fn deadline(&self) -> Option { + self.planning().and_then(|planning| planning.deadline()) + } + + /// Returns an iterator of text token in this tags + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let tags_vec = |input: &str| { + /// let hdl = Org::parse(input).first_node::().unwrap(); + /// let tags: Vec<_> = hdl.tags().map(|t| t.to_string()).collect(); + /// tags + /// }; + /// + /// assert_eq!(tags_vec("* :tag:"), vec!["tag".to_string()]); + /// assert_eq!(tags_vec("* [#A] :::::a2%:"), vec!["a2%".to_string()]); + /// assert_eq!(tags_vec("* TODO :tag: :a2%:"), vec!["tag".to_string(), "a2%".to_string()]); + /// assert_eq!(tags_vec("* title :tag:a2%:"), vec!["tag".to_string(), "a2%".to_string()]); + /// ``` + pub fn tags(&self) -> impl Iterator { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::HEADLINE_TAGS) + .into_iter() + .flat_map(|t| t.children_with_tokens()) + .filter_map(filter_token(SyntaxKind::TEXT)) + } + + /// Returns priority text + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let hdl = Org::parse("* [#A]").first_node::().unwrap(); + /// assert_eq!(hdl.priority().unwrap(), "A"); + /// let hdl = Org::parse("** DONE [#B]::").first_node::().unwrap(); + /// assert_eq!(hdl.priority().unwrap(), "B"); + /// let hdl = Org::parse("* [#破]").first_node::().unwrap(); + /// assert_eq!(hdl.priority().unwrap(), "破"); + /// ``` + pub fn priority(&self) -> Option { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::HEADLINE_PRIORITY) + .and_then(|n| { + n.children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + }) + } + + /// Returns an iterator of clock element affiliated with this headline + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let org = Org::parse(r#"* TODO + /// foo + /// :LOGBOOK: + /// bar + /// CLOCK: + /// CLOCK: [2024-10-12] + /// baz + /// CLOCK: [2024-10-12] + /// [2024-10-12] + /// :END: + /// foo"#); + /// let hdl = org.first_node::().unwrap(); + /// assert_eq!(hdl.clocks().count(), 2); + /// ``` + pub fn clocks(&self) -> impl Iterator { + self.syntax + .children() + .flat_map(Section::cast) + .flat_map(|x| x.syntax.children().filter_map(Drawer::cast)) + .filter(|d| d.name().eq_ignore_ascii_case("LOGBOOK")) + .filter_map(|d| { + d.syntax + .children() + .find(|children| children.kind() == SyntaxKind::DRAWER_CONTENT) + }) + .flat_map(|x| x.children().filter_map(Clock::cast)) + } +} diff --git a/src/ast/inline_call.rs b/src/ast/inline_call.rs new file mode 100644 index 0000000..4dc2c04 --- /dev/null +++ b/src/ast/inline_call.rs @@ -0,0 +1,80 @@ +use crate::syntax::SyntaxKind; + +use super::{filter_token, InlineCall, Token}; + +impl InlineCall { + /// + /// ```rust + /// use orgize::{Org, ast::InlineCall}; + /// + /// let call = Org::parse("call_square(4)").first_node::().unwrap(); + /// assert_eq!(call.call(), "square"); + /// ``` + pub fn call(&self) -> Token { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .nth(1) + .expect("inline call must contains two TEXT") + } + + /// + /// ```rust + /// use orgize::{Org, ast::InlineCall}; + /// + /// let call = Org::parse("call_square[:results output](4)").first_node::().unwrap(); + /// assert_eq!(call.inside_header().unwrap(), ":results output"); + /// + /// let call = Org::parse("call_square(4)[:results html]").first_node::().unwrap(); + /// assert!(call.inside_header().is_none()); + /// ``` + pub fn inside_header(&self) -> Option { + self.syntax + .children_with_tokens() + .take_while(|e| e.kind() != SyntaxKind::L_PARENS) + .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } + + /// + /// ```rust + /// use orgize::{Org, ast::InlineCall}; + /// + /// let call = Org::parse("call_square(4)").first_node::().unwrap(); + /// assert_eq!(call.arguments(), "4"); + /// ``` + pub fn arguments(&self) -> Token { + self.syntax + .children_with_tokens() + .skip_while(|e| e.kind() != SyntaxKind::L_PARENS) + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("inline call must contains TEXT after L_PARENS") + } + + /// + /// ```rust + /// use orgize::{Org, ast::InlineCall}; + /// + /// let call = Org::parse("call_square[:results output](4)[:results html]").first_node::().unwrap(); + /// assert_eq!(call.end_header().unwrap(), ":results html"); + /// + /// let call = Org::parse("call_square[:results output](4)").first_node::().unwrap(); + /// assert!(call.end_header().is_none()); + /// ``` + pub fn end_header(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) + .skip(1) + .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) + .nth(1) + .and_then(|e| { + debug_assert_eq!(e.kind(), SyntaxKind::TEXT); + Some(Token(e.into_token()?)) + }) + } +} diff --git a/src/ast/inline_src.rs b/src/ast/inline_src.rs new file mode 100644 index 0000000..87aff35 --- /dev/null +++ b/src/ast/inline_src.rs @@ -0,0 +1,62 @@ +use crate::SyntaxKind; + +use super::{filter_token, InlineSrc, Token}; + +impl InlineSrc { + /// Language of the code + /// + /// ```rust + /// use orgize::{Org, ast::InlineSrc}; + /// + /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); + /// assert_eq!(s.language(), "C"); + /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); + /// assert_eq!(s.language(), "xml"); + /// ``` + pub fn language(&self) -> Token { + self.syntax + .children_with_tokens() + .nth(1) + .and_then(filter_token(SyntaxKind::TEXT)) + .expect("inline src must contains TEXT") + } + + /// Optional header arguments + /// + /// ```rust + /// use orgize::{Org, ast::InlineSrc}; + /// + /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); + /// assert!(s.parameters().is_none()); + /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); + /// assert_eq!(s.parameters().unwrap(), ":exports code"); + /// ``` + pub fn parameters(&self) -> Option { + self.syntax + .children_with_tokens() + .skip_while(|n| n.kind() != SyntaxKind::L_BRACKET) + .nth(1) + .and_then(|n| { + debug_assert_eq!(n.kind(), SyntaxKind::TEXT); + Some(Token(n.into_token()?)) + }) + } + + /// Source code + /// + /// ```rust + /// use orgize::{Org, ast::InlineSrc}; + /// + /// let s = Org::parse("src_C{int a = 0;}").first_node::().unwrap(); + /// assert_eq!(s.value(), "int a = 0;"); + /// let s = Org::parse("src_xml[:exports code]{text}").first_node::().unwrap(); + /// assert_eq!(s.value(), "text"); + /// ``` + pub fn value(&self) -> Token { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .last() + .expect("inline src must contains TEXT") + } +} diff --git a/src/ast/keyword.rs b/src/ast/keyword.rs new file mode 100644 index 0000000..cf0a6f4 --- /dev/null +++ b/src/ast/keyword.rs @@ -0,0 +1,36 @@ +use crate::SyntaxKind; + +use super::{filter_token, Keyword, Token}; + +impl Keyword { + /// + /// ```rust + /// use orgize::{Org, ast::Keyword}; + /// + /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.key(), "KEY"); + /// ``` + pub fn key(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("keyword must contains TEXT") + } + + /// + /// ```rust + /// use orgize::{Org, ast::Keyword}; + /// + /// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::().unwrap(); + /// assert_eq!(keyword.value(), " VALUE"); + /// let keyword = Org::parse("#+KEY:").first_node::().unwrap(); + /// assert_eq!(keyword.value(), ""); + /// ``` + pub fn value(&self) -> Token { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .nth(1) + .expect("keyword must contains two TEXT") + } +} diff --git a/src/ast/link.rs b/src/ast/link.rs new file mode 100644 index 0000000..daff761 --- /dev/null +++ b/src/ast/link.rs @@ -0,0 +1,121 @@ +use rowan::ast::AstNode; + +use super::{token, AffiliatedKeyword, Link, Paragraph, Token}; +use crate::{syntax::SyntaxKind, SyntaxElement}; + +impl Link { + /// Returns link destination + /// + /// ```rust + /// use orgize::{Org, ast::Link}; + /// + /// let link = Org::parse("[[#id]]").first_node::().unwrap(); + /// assert_eq!(link.path(), "#id"); + /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); + /// assert_eq!(link.path(), "https://google.com"); + /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); + /// assert_eq!(link.path(), "https://google.com"); + /// ``` + pub fn path(&self) -> Token { + token(&self.syntax, SyntaxKind::LINK_PATH).expect("link must contains LINK_PATH") + } + + /// Returns `true` if link contains description + /// + /// ```rust + /// use orgize::{Org, ast::Link}; + /// + /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); + /// assert!(!link.has_description()); + /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); + /// assert!(link.has_description()); + /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); + /// assert!(link.has_description()); + /// ``` + pub fn has_description(&self) -> bool { + self.syntax() + .children_with_tokens() + .any(|e| e.kind() == SyntaxKind::L_BRACKET) + } + + /// Returns parsed description + /// + /// Returns empty iterator if this link doesn't contain description + /// + /// ```rust + /// use orgize::{Org, ast::Link, SyntaxKind}; + /// + /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); + /// assert_eq!(link.description().count(), 0); + /// + /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); + /// let description = link.description().collect::>(); + /// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::TEXT, "Google".into())); + /// + /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); + /// let description = link.description().collect::>(); + /// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::BOLD, "*abc*".into())); + /// assert_eq!((description[2].kind(), description[2].to_string()), (SyntaxKind::ITALIC, "/abc/".into())); + /// ``` + pub fn description(&self) -> impl Iterator { + self.syntax() + .children_with_tokens() + .skip_while(|e| e.kind() != SyntaxKind::L_BRACKET) + .skip(1) + .take_while(|e| e.kind() != SyntaxKind::R_BRACKET2) + } + + /// Returns description raw string + /// + /// Returns empty string if this link doesn't contain description + /// + /// ```rust + /// use orgize::{Org, ast::Link}; + /// + /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); + /// assert_eq!(link.description_raw(), ""); + /// let link = Org::parse("[[https://google.com][Google]]").first_node::().unwrap(); + /// assert_eq!(link.description_raw(), "Google"); + /// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::().unwrap(); + /// assert_eq!(link.description_raw(), "*abc* /abc/"); + /// ``` + pub fn description_raw(&self) -> String { + self.description() + .fold(String::new(), |acc, e| acc + &e.to_string()) + } + + /// Returns `true` if link is an image link + /// + /// ```rust + /// use orgize::{Org, ast::Link}; + /// + /// let link = Org::parse("[[https://google.com]]").first_node::().unwrap(); + /// assert!(!link.is_image()); + /// let link = Org::parse("[[file:/home/dominik/images/jupiter.jpg]]").first_node::().unwrap(); + /// assert!(link.is_image()); + /// ``` + pub fn is_image(&self) -> bool { + const IMAGE_SUFFIX: &[&str] = &[ + // https://github.com/bzg/org-mode/blob/7de1e818d5fbe6a05c6b1a007eed07dc27e7246b/lisp/ox.el#L253 + ".png", ".jpeg", ".jpg", ".gif", ".tiff", ".tif", ".xbm", ".xpm", ".pbm", ".pgm", + ".ppm", ".webp", ".avif", ".svg", + ]; + + let path = self.path(); + + IMAGE_SUFFIX.iter().any(|e| path.ends_with(e)) && !self.has_description() + } + + /// Returns caption keyword in this link + /// + /// ```rust + /// use orgize::{Org, ast::Link}; + /// + /// let link = Org::parse("#+CAPTION: image link\n[[file:/home/dominik/images/jupiter.jpg]]").first_node::().unwrap(); + /// assert_eq!(link.caption().unwrap().value().unwrap(), " image link"); + /// ``` + pub fn caption(&self) -> Option { + // TODO: support other element type + Paragraph::cast(self.syntax.parent()?.clone())?.caption() + } +} diff --git a/src/ast/list.rs b/src/ast/list.rs new file mode 100644 index 0000000..d53bbc9 --- /dev/null +++ b/src/ast/list.rs @@ -0,0 +1,139 @@ +use super::{filter_token, List, ListItem, Token}; +use crate::{syntax::SyntaxKind, SyntaxElement}; + +impl List { + /// Returns `true` if this list is an ordered link + /// + /// ```rust + /// use orgize::{Org, ast::List}; + /// + /// let list = Org::parse("+ 1").first_node::().unwrap(); + /// assert!(!list.is_ordered()); + /// + /// let list = Org::parse("1. 1").first_node::().unwrap(); + /// assert!(list.is_ordered()); + /// + /// let list = Org::parse("1) 1\n- 2\n3. 3").first_node::().unwrap(); + /// assert!(list.is_ordered()); + /// ``` + pub fn is_ordered(&self) -> bool { + self.items().next().map_or_else( + || { + debug_assert!(false, "list muts contains LIST_ITEM"); + false + }, + |item| item.bullet().starts_with(|c: char| c.is_ascii_digit()), + ) + } + + /// Returns `true` if this list contains a TAG + /// + /// ```rust + /// use orgize::{Org, ast::List}; + /// + /// let list = Org::parse("- some tag :: item 2.1").first_node::().unwrap(); + /// assert!(list.is_descriptive()); + /// let list = Org::parse("2. [X] item 2").first_node::().unwrap(); + /// assert!(!list.is_descriptive()); + /// ``` + pub fn is_descriptive(&self) -> bool { + self.items().next().map_or_else( + || { + debug_assert!(false, "list must contains LIST_ITEM"); + false + }, + |item| { + item.syntax + .children() + .any(|it| it.kind() == SyntaxKind::LIST_ITEM_TAG) + }, + ) + } +} + +impl ListItem { + /// ```rust + /// use orgize::{Org, ast::ListItem}; + /// + /// let item = Org::parse("- 1").first_node::().unwrap(); + /// assert_eq!(item.indent(), 0); + /// let item = Org::parse(" \t * 2").first_node::().unwrap(); + /// assert_eq!(item.indent(), 3); + /// ``` + pub fn indent(&self) -> usize { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::LIST_ITEM_INDENT)) + .map_or_else( + || { + debug_assert!(false, "list item must contains LIST_ITEM_INDENT"); + 0 + }, + |t| t.len(), + ) + } + + /// ```rust + /// use orgize::{Org, ast::ListItem}; + /// + /// let item = Org::parse("- some tag").first_node::().unwrap(); + /// assert_eq!(item.bullet(), "- "); + /// let item = Org::parse("2. [X] item 2").first_node::().unwrap(); + /// assert_eq!(item.bullet(), "2. "); + /// ``` + pub fn bullet(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::LIST_ITEM_BULLET)) + .expect("list item must contains LIST_ITEM_BULLET") + } + + /// ```rust + /// use orgize::{Org, ast::ListItem}; + /// + /// let item = Org::parse("- [-] item 1").first_node::().unwrap(); + /// assert_eq!(item.checkbox().unwrap(), "-"); + /// let item = Org::parse("2. [X] item 2").first_node::().unwrap(); + /// assert_eq!(item.checkbox().unwrap(), "X"); + /// let item = Org::parse("3) [ ] item 3").first_node::().unwrap(); + /// assert_eq!(item.checkbox().unwrap(), " "); + /// ``` + pub fn checkbox(&self) -> Option { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::LIST_ITEM_CHECK_BOX) + .and_then(|n| { + n.children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + }) + } + + pub fn counter(&self) -> Option { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::LIST_ITEM_COUNTER) + .and_then(|n| { + n.children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + }) + } + + /// ```rust + /// use orgize::{Org, ast::ListItem}; + /// + /// let item = Org::parse("+ this is *TAG* :: item1").first_node::().unwrap(); + /// let tag = item.tag().map(|n| n.to_string()).collect::(); + /// assert_eq!(tag, "this is *TAG* "); + /// ``` + pub fn tag(&self) -> impl Iterator { + self.syntax + .children() + .find(|n| n.kind() == SyntaxKind::LIST_ITEM_TAG) + .into_iter() + .flat_map(|n| { + n.children_with_tokens().filter(|n| { + n.kind() != SyntaxKind::WHITESPACE && n.kind() != SyntaxKind::COLON2 + }) + }) + } +} diff --git a/src/ast/macros.rs b/src/ast/macros.rs new file mode 100644 index 0000000..91dfa57 --- /dev/null +++ b/src/ast/macros.rs @@ -0,0 +1,35 @@ +use crate::SyntaxKind; + +use super::{filter_token, Macros, Token}; + +impl Macros { + /// ```rust + /// use orgize::{Org, ast::Macros}; + /// + /// let m = Org::parse("{{{title}}}").first_node::().unwrap(); + /// assert_eq!(m.key(), "title"); + /// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::().unwrap(); + /// assert_eq!(m.key(), "two_arg_macro"); + /// ``` + pub fn key(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("macros must contains TEXT") + } + + /// ```rust + /// use orgize::{Org, ast::Macros}; + /// + /// let m = Org::parse("{{{title}}}").first_node::().unwrap(); + /// assert!(m.args().is_none()); + /// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::().unwrap(); + /// assert_eq!(m.args().unwrap(), "1, 2"); + /// ``` + pub fn args(&self) -> Option { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .nth(1) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..9cd7817 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,176 @@ +mod generated; + +mod affiliated_keyword; +mod block; +mod clock; +#[cfg(feature = "syntax-org-fc")] +mod cloze; +mod comment; +mod document; +mod drawer; +mod entity; +mod fixed_width; +mod headline; +mod inline_call; +mod inline_src; +mod keyword; +mod link; +mod list; +mod macros; +mod planning; +mod snippet; +mod table; +mod timestamp; + +#[cfg(feature = "syntax-org-fc")] +pub use cloze::*; +pub use generated::*; +pub use headline::*; +pub use rowan::ast::support::*; +pub use timestamp::*; + +use crate::{ + syntax::{SyntaxKind, SyntaxNode}, + SyntaxToken, +}; +use rowan::{ast::AstNode, NodeOrToken, TextRange, TextSize}; +use std::{ + borrow::{Borrow, Cow}, + fmt, + hash::Hash, + ops::Deref, +}; + +pub fn blank_lines(parent: &SyntaxNode) -> usize { + parent + .children_with_tokens() + .filter(|n| n.kind() == SyntaxKind::BLANK_LINE) + .count() +} + +pub fn last_child(parent: &rowan::SyntaxNode) -> Option { + parent.children().filter_map(N::cast).last() +} + +pub fn last_token(parent: &SyntaxNode, kind: SyntaxKind) -> Option { + parent + .children_with_tokens() + .filter_map(filter_token(kind)) + .last() +} + +pub fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option { + rowan::ast::support::token(parent, kind).map(Token) +} + +pub fn filter_token( + kind: SyntaxKind, +) -> impl Fn(NodeOrToken) -> Option { + move |elem| match elem { + NodeOrToken::Token(tk) if tk.kind() == kind => Some(Token(tk)), + _ => None, + } +} + +/// A simple wrapper of `SyntaxToken` +/// +/// It implements the `AsRef` and `Display` trait, +/// allowing to directly use some `str` methods. +/// +/// Also it implements `Hash` and `Eq` traits, so can be +/// used as keys in `HashMap`. However, note that it only +/// compares the underlying text inside `SyntaxToken`, +/// meaning two `Token`s from different positions +/// might be considered equal. +#[derive(Eq, Clone)] +pub struct Token(pub(crate) SyntaxToken); + +impl Token { + pub fn syntax(&self) -> &SyntaxToken { + &self.0 + } + + /// Range of this token + pub fn text_range(&self) -> TextRange { + self.0.text_range() + } + + /// Beginning position of this token + pub fn start(&self) -> TextSize { + self.0.text_range().start() + } + + /// Ending position of this token + pub fn end(&self) -> TextSize { + self.0.text_range().end() + } +} + +impl AsRef for Token { + fn as_ref(&self) -> &str { + self.0.text() + } +} + +impl Borrow for Token { + fn borrow(&self) -> &str { + self.as_ref() + } +} + +impl<'a> PartialEq<&'a str> for Token { + fn eq(&self, other: &&'a str) -> bool { + self.as_ref() == *other + } +} + +impl PartialEq for Token { + fn eq(&self, other: &String) -> bool { + self.as_ref() == other + } +} + +impl PartialEq for Token { + fn eq(&self, other: &Token) -> bool { + self.as_ref() == other.as_ref() + } +} + +impl Hash for Token { + fn hash(&self, state: &mut H) { + self.as_ref().hash(state) + } +} + +impl<'a> PartialEq> for Token { + fn eq(&self, other: &Cow<'a, str>) -> bool { + self.as_ref() == other + } +} + +impl PartialEq for Token { + fn eq(&self, other: &str) -> bool { + self.as_ref() == other + } +} + +impl Deref for Token { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.as_ref() + } +} + +impl fmt::Debug for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.0.text(), f) + } +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.0.text(), f) + } +} diff --git a/src/ast/planning.rs b/src/ast/planning.rs new file mode 100644 index 0000000..6a8bba3 --- /dev/null +++ b/src/ast/planning.rs @@ -0,0 +1,67 @@ +use rowan::ast::AstNode; + +use super::{Planning, Timestamp}; +use crate::syntax::SyntaxKind; + +impl Planning { + /// Returns deadline timestamp + /// + /// + /// ```rust + /// use orgize::{ast::Planning, Org}; + /// + /// let s = Org::parse("* a\nDEADLINE: <2019-04-08 Mon>") + /// .first_node::() + /// .unwrap() + /// .deadline() + /// .unwrap(); + /// assert_eq!(s.day_start().unwrap(), "08"); + /// ``` + pub fn deadline(&self) -> Option { + self.syntax + .children() + .filter(|n| n.kind() == SyntaxKind::PLANNING_DEADLINE) + .last() + .and_then(|n| n.children().find_map(Timestamp::cast)) + } + + /// Returns scheduled timestamp + /// + /// ```rust + /// use orgize::{ast::Planning, Org}; + /// + /// let s = Org::parse("* a\nSCHEDULED: <2019-04-08 Mon>") + /// .first_node::() + /// .unwrap() + /// .scheduled() + /// .unwrap(); + /// assert_eq!(s.year_start().unwrap(), "2019"); + /// ``` + pub fn scheduled(&self) -> Option { + self.syntax + .children() + .filter(|n| n.kind() == SyntaxKind::PLANNING_SCHEDULED) + .last() + .and_then(|n| n.children().find_map(Timestamp::cast)) + } + + /// Returns closed timestamp + /// + /// ```rust + /// use orgize::{ast::Planning, Org}; + /// + /// let s = Org::parse("* a\nCLOSED: <2019-04-08 Mon>") + /// .first_node::() + /// .unwrap() + /// .closed() + /// .unwrap(); + /// assert_eq!(s.month_start().unwrap(), "04"); + /// ``` + pub fn closed(&self) -> Option { + self.syntax + .children() + .filter(|n| n.kind() == SyntaxKind::PLANNING_CLOSED) + .last() + .and_then(|n| n.children().find_map(Timestamp::cast)) + } +} diff --git a/src/ast/snippet.rs b/src/ast/snippet.rs new file mode 100644 index 0000000..393713b --- /dev/null +++ b/src/ast/snippet.rs @@ -0,0 +1,34 @@ +use crate::syntax::SyntaxKind; + +use super::{filter_token, Snippet, Token}; + +impl Snippet { + /// ```rust + /// use orgize::{Org, ast::Snippet}; + /// + /// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::().unwrap(); + /// assert_eq!(snippet.backend(), "BACKEND"); + /// ``` + pub fn backend(&self) -> Token { + self.syntax + .children_with_tokens() + .find_map(filter_token(SyntaxKind::TEXT)) + .expect("snippet must contains TEXT") + } + + /// ```rust + /// use orgize::{Org, ast::Snippet}; + /// + /// let snippet = Org::parse("@@BACKEND:@@").first_node::().unwrap(); + /// assert_eq!(snippet.value(), ""); + /// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::().unwrap(); + /// assert_eq!(snippet.value(), "VALUE"); + /// ``` + pub fn value(&self) -> Token { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .nth(1) + .expect("snippet must contains two TEXT") + } +} diff --git a/src/ast/table.rs b/src/ast/table.rs new file mode 100644 index 0000000..39999ee --- /dev/null +++ b/src/ast/table.rs @@ -0,0 +1,110 @@ +use rowan::ast::AstNode; + +use super::{filter_token, OrgTable, OrgTableRow, Token}; +use crate::syntax::SyntaxKind; + +impl OrgTable { + /// Returns `true` if this table has a header + /// + /// A table has a header when it contains at least two row groups. + /// + /// ```rust + /// use orgize::{Org, ast::OrgTable}; + /// + /// let org = Org::parse(r#" + /// | a | b | + /// |---+---| + /// | c | d |"#); + /// let table = org.first_node::().unwrap(); + /// assert!(table.has_header()); + /// + /// let org = Org::parse(r#" + /// | a | b | + /// | 0 | 1 | + /// |---+---| + /// | a | w |"#); + /// let table = org.first_node::().unwrap(); + /// assert!(table.has_header()); + /// + /// let org = Org::parse(r#" + /// | a | b | + /// | c | d |"#); + /// let table = org.first_node::().unwrap(); + /// assert!(!table.has_header()); + /// + /// let org = Org::parse(r#" + /// |---+---| + /// | a | b | + /// | c | d | + /// |---+---|"#); + /// let table = org.first_node::().unwrap(); + /// assert!(!table.has_header()); + /// ``` + pub fn has_header(&self) -> bool { + self.syntax + .children() + .filter_map(OrgTableRow::cast) + .skip_while(|row| row.is_rule()) + .skip_while(|row| row.is_standard()) + .any(|row| !row.is_rule()) + } + + /// Formulas associated to the table + /// + /// ```rust + /// use orgize::{Org, ast::OrgTable}; + /// + /// let table = Org::parse("| a |").first_node::().unwrap(); + /// assert_eq!(table.tblfm().count(), 0); + /// + /// let table = Org::parse("| a |\n#+tblfm: test").first_node::().unwrap(); + /// let tblfm = table.tblfm().collect::>(); + /// assert_eq!(tblfm.len(), 1); + /// assert_eq!(tblfm[0], " test"); + /// + /// let table = Org::parse("| a |\n#+TBLFM: test1\n#+TBLFM: test2").first_node::().unwrap(); + /// let tblfm = table.tblfm().collect::>(); + /// assert_eq!(tblfm.len(), 2); + /// assert_eq!(tblfm[0], " test1"); + /// assert_eq!(tblfm[1], " test2"); + /// ``` + pub fn tblfm(&self) -> impl Iterator { + self.syntax.children().filter_map(|n| { + if n.kind() == SyntaxKind::KEYWORD { + n.children_with_tokens() + .filter_map(filter_token(SyntaxKind::TEXT)) + .last() + } else { + None + } + }) + } +} + +impl OrgTableRow { + /// Returns `true` if this row is a rule + /// + /// ```rust + /// use orgize::{Org, ast::OrgTableRow}; + /// + /// let org = Org::parse("|----|----|\n|Foo |Bar |"); + /// let row = org.first_node::().unwrap(); + /// assert!(row.is_rule()); + /// ``` + pub fn is_rule(&self) -> bool { + self.syntax.kind() == SyntaxKind::ORG_TABLE_RULE_ROW + } + + /// Returns `true` if this row is a standard row + /// + /// ```rust + /// use orgize::{Org, ast::OrgTableRow}; + /// + /// let org = Org::parse("|Foo |Bar |\n|----|----|"); + /// let row = org.first_node::().unwrap(); + /// assert!(row.is_standard()); + /// ``` + pub fn is_standard(&self) -> bool { + self.syntax.kind() == SyntaxKind::ORG_TABLE_STANDARD_ROW + } +} diff --git a/src/ast/timestamp.rs b/src/ast/timestamp.rs new file mode 100644 index 0000000..6d8a1d6 --- /dev/null +++ b/src/ast/timestamp.rs @@ -0,0 +1,301 @@ +use super::{filter_token, Timestamp}; +use crate::syntax::SyntaxKind; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum TimeUnit { + Hour, + Day, + Week, + Month, + Year, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum RepeaterType { + Cumulate, + CatchUp, + Restart, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum DelayType { + All, + First, +} + +impl Timestamp { + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let ts = Org::parse("<2003-09-16 Tue 09:39-10:39>").first_node::().unwrap(); + /// assert!(ts.is_active()); + /// let ts = Org::parse("<2003-09-16 Tue 09:39>--<2003-09-16 Tue 10:39>").first_node::().unwrap(); + /// assert!(ts.is_active()); + /// let ts = Org::parse("<2003-09-16 Tue 09:39>").first_node::().unwrap(); + /// assert!(ts.is_active()); + /// ``` + pub fn is_active(&self) -> bool { + self.syntax.kind() == SyntaxKind::TIMESTAMP_ACTIVE + } + + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert!(ts.is_inactive()); + /// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::().unwrap(); + /// assert!(ts.is_inactive()); + /// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::().unwrap(); + /// assert!(ts.is_inactive()); + /// ``` + pub fn is_inactive(&self) -> bool { + self.syntax.kind() == SyntaxKind::TIMESTAMP_INACTIVE + } + + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let ts = Org::parse("<%%(org-calendar-holiday)>").first_node::().unwrap(); + /// assert!(ts.is_diary()); + /// ``` + pub fn is_diary(&self) -> bool { + self.syntax.kind() == SyntaxKind::TIMESTAMP_DIARY + } + + /// Returns `true` if this timestamp has a range + /// + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert!(ts.is_range()); + /// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::().unwrap(); + /// assert!(ts.is_range()); + /// let ts = Org::parse("[2003-09-16]--[2003-09-16]").first_node::().unwrap(); + /// assert!(ts.is_range()); + /// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::().unwrap(); + /// assert!(!ts.is_range()); + /// ``` + pub fn is_range(&self) -> bool { + self.syntax + .children_with_tokens() + .filter_map(filter_token(SyntaxKind::MINUS)) + .count() + > 2 + } + + /// ```rust + /// use orgize::{Org, ast::{Timestamp, RepeaterType}}; + /// + /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_type(), Some(RepeaterType::Cumulate)); + /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_type(), Some(RepeaterType::Restart)); + /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); + /// assert_eq!(t.repeater_type(), None); + /// ``` + pub fn repeater_type(&self) -> Option { + self.nth_repeater(0).map(|i| i.0) + } + + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_value(), Some(1)); + /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_value(), Some(10)); + /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); + /// assert_eq!(t.repeater_value(), None); + /// ``` + pub fn repeater_value(&self) -> Option { + self.nth_repeater(0).map(|i| i.1) + } + + /// ```rust + /// use orgize::{Org, ast::{Timestamp, TimeUnit}}; + /// + /// let t = Org::parse("[2000-01-01 +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_unit(), Some(TimeUnit::Week)); + /// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::().unwrap(); + /// assert_eq!(t.repeater_unit(), Some(TimeUnit::Day)); + /// let t = Org::parse("[2000-01-01 --1y]").first_node::().unwrap(); + /// assert_eq!(t.repeater_unit(), None); + /// ``` + pub fn repeater_unit(&self) -> Option { + self.nth_repeater(0).map(|i| i.2) + } + + /// ```rust + /// use orgize::{Org, ast::{Timestamp, DelayType}}; + /// + /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); + /// assert_eq!(t.warning_type(), Some(DelayType::All)); + /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); + /// assert_eq!(t.warning_type(), Some(DelayType::All)); + /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); + /// assert_eq!(t.warning_type(), Some(DelayType::First)); + /// ``` + pub fn warning_type(&self) -> Option { + self.nth_delay(0).map(|i| i.0) + } + + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); + /// assert_eq!(t.warning_value(), Some(3)); + /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); + /// assert_eq!(t.warning_value(), Some(5)); + /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); + /// assert_eq!(t.warning_value(), Some(10)); + /// ``` + pub fn warning_value(&self) -> Option { + self.nth_delay(0).map(|i| i.1) + } + + /// ```rust + /// use orgize::{Org, ast::{Timestamp, TimeUnit}}; + /// + /// let t = Org::parse("[2000-01-01 -3y]").first_node::().unwrap(); + /// assert_eq!(t.warning_unit(), Some(TimeUnit::Year)); + /// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::().unwrap(); + /// assert_eq!(t.warning_unit(), Some(TimeUnit::Week)); + /// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::().unwrap(); + /// assert_eq!(t.warning_unit(), Some(TimeUnit::Month)); + /// ``` + pub fn warning_unit(&self) -> Option { + self.nth_delay(0).map(|i| i.2) + } + + fn nth_repeater(&self, nth: usize) -> Option<(RepeaterType, u32, TimeUnit)> { + let mut i = nth + 1; + + let mut iter = self.syntax.children_with_tokens().skip_while(|n| { + if n.kind() == SyntaxKind::TIMESTAMP_REPEATER_MARK { + i -= 1; + i != 0 + } else { + true + } + }); + + let mark = iter.next().and_then(|n| match n.as_token()?.text() { + "++" => Some(RepeaterType::CatchUp), + "+" => Some(RepeaterType::Cumulate), + ".+" => Some(RepeaterType::Restart), + _ => None, + })?; + let value = iter + .next() + .and_then(|n| n.as_token()?.text().parse::().ok())?; + let unit = iter.next().and_then(|n| match n.as_token()?.text() { + "h" => Some(TimeUnit::Hour), + "d" => Some(TimeUnit::Day), + "w" => Some(TimeUnit::Week), + "m" => Some(TimeUnit::Month), + "y" => Some(TimeUnit::Year), + _ => None, + })?; + + Some((mark, value, unit)) + } + + fn nth_delay(&self, nth: usize) -> Option<(DelayType, u32, TimeUnit)> { + let mut i = nth + 1; + + let mut iter = self.syntax.children_with_tokens().skip_while(|n| { + if n.kind() == SyntaxKind::TIMESTAMP_DELAY_MARK { + i -= 1; + i != 0 + } else { + true + } + }); + + let mark = iter.next().and_then(|n| match n.as_token()?.text() { + "-" => Some(DelayType::All), + "--" => Some(DelayType::First), + _ => None, + })?; + let value = iter + .next() + .and_then(|n| n.as_token()?.text().parse::().ok())?; + let unit = iter.next().and_then(|n| match n.as_token()?.text() { + "h" => Some(TimeUnit::Hour), + "d" => Some(TimeUnit::Day), + "w" => Some(TimeUnit::Week), + "m" => Some(TimeUnit::Month), + "y" => Some(TimeUnit::Year), + _ => None, + })?; + + Some((mark, value, unit)) + } + + /// Converts timestamp start to chrono NaiveDateTime + /// + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// use chrono::NaiveDateTime; + /// + /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert_eq!(ts.start_to_chrono().unwrap(), "2003-09-16T09:39:00".parse::().unwrap()); + /// + /// let ts = Org::parse("[2003-13-00 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert!(ts.start_to_chrono().is_none()); + /// ``` + #[cfg(feature = "chrono")] + pub fn start_to_chrono(&self) -> Option { + Some(chrono::NaiveDateTime::new( + chrono::NaiveDate::from_ymd_opt( + self.year_start()?.parse().ok()?, + self.month_start()?.parse().ok()?, + self.day_start()?.parse().ok()?, + )?, + chrono::NaiveTime::from_hms_opt( + self.hour_start()?.parse().ok()?, + self.minute_start()?.parse().ok()?, + 0, + )?, + )) + } + + /// Converts timestamp end to chrono NaiveDateTime + /// + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// use chrono::NaiveDateTime; + /// + /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert_eq!(ts.end_to_chrono().unwrap(), "2003-09-16T10:39:00".parse::().unwrap()); + /// ``` + #[cfg(feature = "chrono")] + pub fn end_to_chrono(&self) -> Option { + Some(chrono::NaiveDateTime::new( + chrono::NaiveDate::from_ymd_opt( + self.year_end()?.parse().ok()?, + self.month_end()?.parse().ok()?, + self.day_end()?.parse().ok()?, + )?, + chrono::NaiveTime::from_hms_opt( + self.hour_end()?.parse().ok()?, + self.minute_end()?.parse().ok()?, + 0, + )?, + )) + } + + /// Returns chrono::TimeDelta between timestamp start and end + /// + /// ```rust + /// use orgize::{Org, ast::Timestamp}; + /// + /// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::().unwrap(); + /// assert_eq!(ts.time_delta().unwrap().num_hours(), 1); + /// ``` + #[cfg(feature = "chrono")] + pub fn time_delta(&self) -> Option { + Some(self.end_to_chrono()? - self.start_to_chrono()?) + } +} diff --git a/src/config.rs b/src/config.rs index 955252d..a51db78 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,18 +1,87 @@ +use crate::syntax::document::document_node; +use crate::Org; + +#[derive(Clone, Debug)] +pub enum UseSubSuperscript { + Nil, + Brace, + True, +} + +impl UseSubSuperscript { + pub fn is_nil(&self) -> bool { + matches!(self, UseSubSuperscript::Nil) + } + + pub fn is_true(&self) -> bool { + matches!(self, UseSubSuperscript::True) + } + + pub fn is_brace(&self) -> bool { + matches!(self, UseSubSuperscript::Brace) + } +} + /// Parse configuration #[derive(Clone, Debug)] pub struct ParseConfig { /// Headline's todo keywords pub todo_keywords: (Vec, Vec), + + pub dual_keywords: Vec, + + pub parsed_keywords: Vec, + + /// Control sub/superscript parsing + /// + /// Equivalent to `org-use-sub-superscripts` + /// + /// - `UseSubSuperscript::Nil`: disable parsing + /// - `UseSubSuperscript::True`: enable parsing + /// - `UseSubSuperscript::Brace`: enable parsing, but braces are required + pub use_sub_superscript: UseSubSuperscript, + + /// Affiliated keywords + /// + /// Equivalent to [`org-element-affiliated-keywords`](https://git.sr.ht/~bzg/org-mode/tree/6f960f3c6a4dfe137fbd33fef9f7dadfd229600c/item/lisp/org-element.el#L331) + pub affiliated_keywords: Vec, +} + +impl ParseConfig { + /// Parses input with current config + pub fn parse(self, input: impl AsRef) -> Org { + let input = (input.as_ref(), &self).into(); + let node = document_node(input).unwrap().1; + + Org { + config: self, + green: node.into_node().unwrap(), + } + } } impl Default for ParseConfig { fn default() -> Self { ParseConfig { - todo_keywords: (vec![String::from("TODO")], vec![String::from("DONE")]), + todo_keywords: (vec!["TODO".into()], vec!["DONE".into()]), + dual_keywords: vec!["CAPTION".into(), "RESULTS".into()], + parsed_keywords: vec!["CAPTION".into()], + use_sub_superscript: UseSubSuperscript::True, + affiliated_keywords: vec![ + "CAPTION".into(), + "DATA".into(), + "HEADER".into(), + "HEADERS".into(), + "LABEL".into(), + "NAME".into(), + "PLOT".into(), + "RESNAME".into(), + "RESULT".into(), + "RESULTS".into(), + "SOURCE".into(), + "SRCNAME".into(), + "TBLNAME".into(), + ], } } } - -lazy_static::lazy_static! { - pub static ref DEFAULT_CONFIG: ParseConfig = ParseConfig::default(); -} diff --git a/src/elements/block.rs b/src/elements/block.rs deleted file mode 100644 index f138950..0000000 --- a/src/elements/block.rs +++ /dev/null @@ -1,408 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::tag_no_case, - character::complete::{alpha1, space0}, - sequence::preceded, - IResult, -}; - -use crate::elements::Element; -use crate::parse::combinators::{blank_lines_count, line, lines_till}; - -/// Special Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct SpecialBlock<'a> { - /// Block parameters - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub parameters: Option>, - /// Block name - pub name: Cow<'a, str>, - /// Numbers of blank lines between first block's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl SpecialBlock<'_> { - pub fn into_owned(self) -> SpecialBlock<'static> { - SpecialBlock { - name: self.name.into_owned().into(), - parameters: self.parameters.map(Into::into).map(Cow::Owned), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -/// Quote Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct QuoteBlock<'a> { - /// Optional block parameters - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub parameters: Option>, - /// Numbers of blank lines between first block's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl QuoteBlock<'_> { - pub fn into_owned(self) -> QuoteBlock<'static> { - QuoteBlock { - parameters: self.parameters.map(Into::into).map(Cow::Owned), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -/// Center Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct CenterBlock<'a> { - /// Optional block parameters - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub parameters: Option>, - /// Numbers of blank lines between first block's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl CenterBlock<'_> { - pub fn into_owned(self) -> CenterBlock<'static> { - CenterBlock { - parameters: self.parameters.map(Into::into).map(Cow::Owned), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -/// Verse Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct VerseBlock<'a> { - /// Optional block parameters - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub parameters: Option>, - /// Numbers of blank lines between first block's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl VerseBlock<'_> { - pub fn into_owned(self) -> VerseBlock<'static> { - VerseBlock { - parameters: self.parameters.map(Into::into).map(Cow::Owned), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -/// Comment Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct CommentBlock<'a> { - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub data: Option>, - /// Comment block contents - pub contents: Cow<'a, str>, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl CommentBlock<'_> { - pub fn into_owned(self) -> CommentBlock<'static> { - CommentBlock { - data: self.data.map(Into::into).map(Cow::Owned), - contents: self.contents.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -/// Example Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct ExampleBlock<'a> { - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub data: Option>, - /// Block contents - pub contents: Cow<'a, str>, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl ExampleBlock<'_> { - pub fn into_owned(self) -> ExampleBlock<'static> { - ExampleBlock { - data: self.data.map(Into::into).map(Cow::Owned), - contents: self.contents.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -/// Export Block Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct ExportBlock<'a> { - pub data: Cow<'a, str>, - /// Block contents - pub contents: Cow<'a, str>, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl ExportBlock<'_> { - pub fn into_owned(self) -> ExportBlock<'static> { - ExportBlock { - data: self.data.into_owned().into(), - contents: self.contents.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -/// Src Block Element -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct SourceBlock<'a> { - /// Block contents - pub contents: Cow<'a, str>, - /// Language of the code in the block - pub language: Cow<'a, str>, - pub arguments: Cow<'a, str>, - /// Numbers of blank lines between last block's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl SourceBlock<'_> { - pub fn into_owned(self) -> SourceBlock<'static> { - SourceBlock { - language: self.language.into_owned().into(), - arguments: self.arguments.into_owned().into(), - contents: self.contents.into_owned().into(), - post_blank: self.post_blank, - } - } - - // TODO: fn number_lines() -> Some(New) | Some(Continued) | None { } - // TODO: fn preserve_indent() -> bool { } - // TODO: fn use_labels() -> bool { } - // TODO: fn label_fmt() -> Option { } - // TODO: fn retain_labels() -> bool { } -} - -#[derive(Debug)] -#[cfg_attr(test, derive(PartialEq))] -pub(crate) struct RawBlock<'a> { - pub name: &'a str, - pub arguments: &'a str, - - pub pre_blank: usize, - pub contents: &'a str, - pub contents_without_blank_lines: &'a str, - - pub post_blank: usize, -} - -impl<'a> RawBlock<'a> { - pub fn parse(input: &str) -> Option<(&str, RawBlock)> { - parse_internal(input).ok() - } - - pub fn into_element(self) -> (Element<'a>, &'a str) { - let RawBlock { - name, - contents, - arguments, - pre_blank, - contents_without_blank_lines, - post_blank, - } = self; - - let arguments: Option> = if arguments.is_empty() { - None - } else { - Some(arguments.into()) - }; - - let element = match &*name.to_uppercase() { - "CENTER" => CenterBlock { - parameters: arguments, - pre_blank, - post_blank, - } - .into(), - "QUOTE" => QuoteBlock { - parameters: arguments, - pre_blank, - post_blank, - } - .into(), - "VERSE" => VerseBlock { - parameters: arguments, - pre_blank, - post_blank, - } - .into(), - "COMMENT" => CommentBlock { - data: arguments, - contents: contents.into(), - post_blank, - } - .into(), - "EXAMPLE" => ExampleBlock { - data: arguments, - contents: contents.into(), - post_blank, - } - .into(), - "EXPORT" => ExportBlock { - data: arguments.unwrap_or_default(), - contents: contents.into(), - post_blank, - } - .into(), - "SRC" => { - let (language, arguments) = match &arguments { - Some(Cow::Borrowed(args)) => { - let (language, arguments) = - args.split_at(args.find(' ').unwrap_or_else(|| args.len())); - (language.into(), arguments.into()) - } - None => (Cow::Borrowed(""), Cow::Borrowed("")), - _ => unreachable!( - "`parse_block_element` returns `Some(Cow::Borrowed)` or `None`" - ), - }; - SourceBlock { - arguments, - language, - contents: contents.into(), - post_blank, - } - .into() - } - _ => SpecialBlock { - parameters: arguments, - name: name.into(), - pre_blank, - post_blank, - } - .into(), - }; - - (element, contents_without_blank_lines) - } -} - -fn parse_internal(input: &str) -> IResult<&str, RawBlock, ()> { - let (input, _) = space0(input)?; - let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; - let (input, arguments) = line(input)?; - let end_line = format!("#+END_{}", name); - let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?; - let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?; - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - RawBlock { - name, - contents, - arguments: arguments.trim(), - pre_blank, - contents_without_blank_lines, - post_blank, - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - RawBlock::parse( - r#"#+BEGIN_SRC -#+END_SRC"# - ), - Some(( - "", - RawBlock { - contents: "", - contents_without_blank_lines: "", - pre_blank: 0, - post_blank: 0, - name: "SRC".into(), - arguments: "" - } - )) - ); - - assert_eq!( - RawBlock::parse( - r#"#+begin_src - #+end_src"# - ), - Some(( - "", - RawBlock { - contents: "", - contents_without_blank_lines: "", - pre_blank: 0, - post_blank: 0, - name: "src".into(), - arguments: "" - } - )) - ); - - assert_eq!( - RawBlock::parse( - r#"#+BEGIN_SRC javascript -console.log('Hello World!'); -#+END_SRC - -"# - ), - Some(( - "", - RawBlock { - contents: "console.log('Hello World!');\n", - contents_without_blank_lines: "console.log('Hello World!');\n", - pre_blank: 0, - post_blank: 1, - name: "SRC".into(), - arguments: "javascript" - } - )) - ); - // TODO: more testing -} diff --git a/src/elements/clock.rs b/src/elements/clock.rs deleted file mode 100644 index c489a88..0000000 --- a/src/elements/clock.rs +++ /dev/null @@ -1,242 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::tag, - character::complete::{char, digit1, space0}, - combinator::recognize, - sequence::separated_pair, - IResult, -}; - -use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp}; -use crate::parse::combinators::{blank_lines_count, eol}; - -/// Clock Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(untagged))] -#[derive(Debug, Clone)] -pub enum Clock<'a> { - /// Closed Clock - Closed { - /// Time start - start: Datetime<'a>, - /// Time end - end: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - /// Clock duration - duration: Cow<'a, str>, - /// Numbers of blank lines between the clock line and next non-blank - /// line or buffer's end - post_blank: usize, - }, - /// Running Clock - Running { - /// Time start - start: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - /// Numbers of blank lines between the clock line and next non-blank - /// line or buffer's end - post_blank: usize, - }, -} - -impl Clock<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, Clock)> { - parse_internal(input).ok() - } - - pub fn into_onwed(self) -> Clock<'static> { - match self { - Clock::Closed { - start, - end, - repeater, - delay, - duration, - post_blank, - } => Clock::Closed { - start: start.into_owned(), - end: end.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - duration: duration.into_owned().into(), - post_blank, - }, - Clock::Running { - start, - repeater, - delay, - post_blank, - } => Clock::Running { - start: start.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - post_blank, - }, - } - } - - /// Returns `true` if the clock is running. - pub fn is_running(&self) -> bool { - match self { - Clock::Closed { .. } => false, - Clock::Running { .. } => true, - } - } - - /// Returns `true` if the clock is closed. - pub fn is_closed(&self) -> bool { - match self { - Clock::Closed { .. } => true, - Clock::Running { .. } => false, - } - } - - /// Returns clock duration, or `None` if it's running. - pub fn duration(&self) -> Option<&str> { - match self { - Clock::Closed { duration, .. } => Some(duration), - Clock::Running { .. } => None, - } - } - - /// Constructs a timestamp from the clock. - pub fn value(&self) -> Timestamp { - match &*self { - Clock::Closed { - start, - end, - repeater, - delay, - .. - } => Timestamp::InactiveRange { - start: start.clone(), - end: end.clone(), - repeater: repeater.clone(), - delay: delay.clone(), - }, - Clock::Running { - start, - repeater, - delay, - .. - } => Timestamp::Inactive { - start: start.clone(), - repeater: repeater.clone(), - delay: delay.clone(), - }, - } - } -} - -fn parse_internal(input: &str) -> IResult<&str, Clock, ()> { - let (input, _) = space0(input)?; - let (input, _) = tag("CLOCK:")(input)?; - let (input, _) = space0(input)?; - let (input, timestamp) = parse_inactive(input)?; - - match timestamp { - Timestamp::InactiveRange { - start, - end, - repeater, - delay, - } => { - let (input, _) = space0(input)?; - let (input, _) = tag("=>")(input)?; - let (input, _) = space0(input)?; - let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?; - let (input, _) = eol(input)?; - let (input, blank) = blank_lines_count(input)?; - Ok(( - input, - Clock::Closed { - start, - end, - repeater, - delay, - duration: duration.into(), - post_blank: blank, - }, - )) - } - Timestamp::Inactive { - start, - repeater, - delay, - } => { - let (input, _) = eol(input)?; - let (input, blank) = blank_lines_count(input)?; - Ok(( - input, - Clock::Running { - start, - repeater, - delay, - post_blank: blank, - }, - )) - } - _ => unreachable!( - "`parse_inactive` only returns `Timestamp::InactiveRange` or `Timestamp::Inactive`." - ), - } -} - -#[test] -fn parse() { - assert_eq!( - Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"), - Some(( - "", - Clock::Running { - start: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(9), - minute: Some(39) - }, - repeater: None, - delay: None, - post_blank: 0, - } - )) - ); - assert_eq!( - Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n"), - Some(( - "", - Clock::Closed { - start: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(9), - minute: Some(39) - }, - end: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(10), - minute: Some(39) - }, - repeater: None, - delay: None, - duration: "1:00".into(), - post_blank: 1, - } - )) - ); -} diff --git a/src/elements/comment.rs b/src/elements/comment.rs deleted file mode 100644 index d6d414d..0000000 --- a/src/elements/comment.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - error::{make_error, ErrorKind}, - Err, IResult, -}; - -use crate::parse::combinators::{blank_lines_count, lines_while}; - -#[derive(Debug, Default, Clone)] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct Comment<'a> { - /// Comments value, with pound signs - pub value: Cow<'a, str>, - /// Numbers of blank lines between last comment's line and next non-blank - /// line or buffer's end - pub post_blank: usize, -} - -impl Comment<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Comment<'static> { - Comment { - value: self.value.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -fn parse_internal(input: &str) -> IResult<&str, Comment, ()> { - let (input, value) = lines_while(|line| { - let line = line.trim_start(); - line == "#" || line.starts_with("# ") - })(input)?; - - if value.is_empty() { - // TODO: better error kind - return Err(Err::Error(make_error(input, ErrorKind::Many0))); - } - - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - Comment { - value: value.into(), - post_blank, - }, - )) -} diff --git a/src/elements/cookie.rs b/src/elements/cookie.rs deleted file mode 100644 index 59dd012..0000000 --- a/src/elements/cookie.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - branch::alt, - bytes::complete::tag, - character::complete::digit0, - combinator::recognize, - sequence::{delimited, pair, separated_pair}, - IResult, -}; - -/// Statistics Cookie Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Cookie<'a> { - /// Full cookie value - pub value: Cow<'a, str>, -} - -impl Cookie<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, Cookie)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Cookie<'static> { - Cookie { - value: self.value.into_owned().into(), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, Cookie, ()> { - let (input, value) = recognize(delimited( - tag("["), - alt(( - separated_pair(digit0, tag("/"), digit0), - pair(digit0, tag("%")), - )), - tag("]"), - ))(input)?; - - Ok(( - input, - Cookie { - value: value.into(), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - Cookie::parse("[1/10]"), - Some(( - "", - Cookie { - value: "[1/10]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[1/1000]"), - Some(( - "", - Cookie { - value: "[1/1000]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[10%]"), - Some(( - "", - Cookie { - value: "[10%]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[%]"), - Some(( - "", - Cookie { - value: "[%]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[/]"), - Some(( - "", - Cookie { - value: "[/]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[100/]"), - Some(( - "", - Cookie { - value: "[100/]".into() - } - )) - ); - assert_eq!( - Cookie::parse("[/100]"), - Some(( - "", - Cookie { - value: "[/100]".into() - } - )) - ); - - assert!(Cookie::parse("[10% ]").is_none()); - assert!(Cookie::parse("[1//100]").is_none()); - assert!(Cookie::parse("[1\\100]").is_none()); - assert!(Cookie::parse("[10%%]").is_none()); -} diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs deleted file mode 100644 index 20bb956..0000000 --- a/src/elements/drawer.rs +++ /dev/null @@ -1,121 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_while1}, - character::complete::space0, - sequence::delimited, - IResult, -}; - -use crate::parse::combinators::{blank_lines_count, eol, lines_till}; - -/// Drawer Element -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct Drawer<'a> { - /// Drawer name - pub name: Cow<'a, str>, - /// Numbers of blank lines between first drawer's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last drawer's line and next non-blank - /// line or buffer's end - pub post_blank: usize, -} - -impl Drawer<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, (Drawer, &str))> { - parse_drawer(input).ok() - } - - pub fn into_owned(self) -> Drawer<'static> { - Drawer { - name: self.name.into_owned().into(), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -#[inline] -pub fn parse_drawer(input: &str) -> IResult<&str, (Drawer, &str), ()> { - let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?; - - let (content, blank) = blank_lines_count(content)?; - drawer.pre_blank = blank; - - let (input, blank) = blank_lines_count(input)?; - drawer.post_blank = blank; - - Ok((input, (drawer, content))) -} - -pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str), ()> { - let (input, _) = space0(input)?; - let (input, name) = delimited( - tag(":"), - take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), - tag(":"), - )(input)?; - let (input, _) = eol(input)?; - let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?; - - Ok(( - input, - ( - Drawer { - name: name.into(), - pre_blank: 0, - post_blank: 0, - }, - contents, - ), - )) -} - -#[test] -fn parse() { - assert_eq!( - parse_drawer( - r#":PROPERTIES: - :CUSTOM_ID: id - :END:"# - ), - Ok(( - "", - ( - Drawer { - name: "PROPERTIES".into(), - pre_blank: 0, - post_blank: 0 - }, - " :CUSTOM_ID: id\n" - ) - )) - ); - assert_eq!( - parse_drawer( - r#":PROPERTIES: - - - :END: - -"# - ), - Ok(( - "", - ( - Drawer { - name: "PROPERTIES".into(), - pre_blank: 2, - post_blank: 1, - }, - "" - ) - )) - ); - - // https://github.com/PoiScript/orgize/issues/9 - assert!(parse_drawer(":SPAGHETTI:\n").is_err()); -} diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs deleted file mode 100644 index c74e7c1..0000000 --- a/src/elements/dyn_block.rs +++ /dev/null @@ -1,99 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::tag_no_case, - character::complete::{alpha1, space0, space1}, - IResult, -}; - -use crate::parse::combinators::{blank_lines_count, line, lines_till}; - -/// Dynamic Block Element -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct DynBlock<'a> { - /// Block name - pub block_name: Cow<'a, str>, - /// Block argument - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub arguments: Option>, - /// Numbers of blank lines between first block's line and next non-blank - /// line - pub pre_blank: usize, - /// Numbers of blank lines between last drawer's line and next non-blank - /// line or buffer's end - pub post_blank: usize, -} - -impl DynBlock<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, (DynBlock, &str))> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> DynBlock<'static> { - DynBlock { - block_name: self.block_name.into_owned().into(), - arguments: self.arguments.map(Into::into).map(Cow::Owned), - pre_blank: self.pre_blank, - post_blank: self.post_blank, - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, (DynBlock, &str), ()> { - let (input, _) = space0(input)?; - let (input, _) = tag_no_case("#+BEGIN:")(input)?; - let (input, _) = space1(input)?; - let (input, name) = alpha1(input)?; - let (input, args) = line(input)?; - let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?; - let (contents, pre_blank) = blank_lines_count(contents)?; - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - ( - DynBlock { - block_name: name.into(), - arguments: if args.trim().is_empty() { - None - } else { - Some(args.trim().into()) - }, - pre_blank, - post_blank, - }, - contents, - ), - )) -} - -#[test] -fn parse() { - // TODO: testing - assert_eq!( - DynBlock::parse( - r#"#+BEGIN: clocktable :scope file - - -CONTENTS -#+END: - -"# - ), - Some(( - "", - ( - DynBlock { - block_name: "clocktable".into(), - arguments: Some(":scope file".into()), - pre_blank: 2, - post_blank: 1, - }, - "CONTENTS\n" - ) - )) - ); -} diff --git a/src/elements/emphasis.rs b/src/elements/emphasis.rs deleted file mode 100644 index 6517c8b..0000000 --- a/src/elements/emphasis.rs +++ /dev/null @@ -1,113 +0,0 @@ -use bytecount::count; -use memchr::memchr_iter; - -use crate::elements::Element; - -#[derive(Debug)] -#[cfg_attr(test, derive(PartialEq))] -pub(crate) struct Emphasis<'a> { - marker: u8, - contents: &'a str, -} - -impl<'a> Emphasis<'a> { - pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> { - if text.len() < 3 { - return None; - } - - let bytes = text.as_bytes(); - - if bytes[1].is_ascii_whitespace() { - return None; - } - - for i in memchr_iter(marker, bytes).skip(1) { - // contains at least one character - if i == 1 { - continue; - } else if count(&bytes[1..i], b'\n') >= 2 { - break; - } else if validate_marker(i, text) { - return Some(( - &text[i + 1..], - Emphasis { - marker, - contents: &text[1..i], - }, - )); - } - } - None - } - - pub fn into_element(self) -> (Element<'a>, &'a str) { - let Emphasis { marker, contents } = self; - let element = match marker { - b'*' => Element::Bold, - b'+' => Element::Strike, - b'/' => Element::Italic, - b'_' => Element::Underline, - b'=' => Element::Verbatim { - value: contents.into(), - }, - b'~' => Element::Code { - value: contents.into(), - }, - _ => unreachable!(), - }; - (element, contents) - } -} - -fn validate_marker(pos: usize, text: &str) -> bool { - if text.as_bytes()[pos - 1].is_ascii_whitespace() { - false - } else if let Some(&post) = text.as_bytes().get(pos + 1) { - match post { - b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true, - _ => false, - } - } else { - true - } -} - -#[test] -fn parse() { - assert_eq!( - Emphasis::parse("*bold*", b'*'), - Some(( - "", - Emphasis { - contents: "bold", - marker: b'*' - } - )) - ); - assert_eq!( - Emphasis::parse("*bo*ld*", b'*'), - Some(( - "", - Emphasis { - contents: "bo*ld", - marker: b'*' - } - )) - ); - assert_eq!( - Emphasis::parse("*bo\nld*", b'*'), - Some(( - "", - Emphasis { - contents: "bo\nld", - marker: b'*' - } - )) - ); - assert_eq!(Emphasis::parse("*bold*a", b'*'), None); - assert_eq!(Emphasis::parse("*bold*", b'/'), None); - assert_eq!(Emphasis::parse("*bold *", b'*'), None); - assert_eq!(Emphasis::parse("* bold*", b'*'), None); - assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None); -} diff --git a/src/elements/fixed_width.rs b/src/elements/fixed_width.rs deleted file mode 100644 index ae06677..0000000 --- a/src/elements/fixed_width.rs +++ /dev/null @@ -1,80 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - error::{make_error, ErrorKind}, - Err, IResult, -}; - -use crate::parse::combinators::{blank_lines_count, lines_while}; - -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct FixedWidth<'a> { - /// Fixed width value - pub value: Cow<'a, str>, - /// Numbers of blank lines between last fixed width's line and next - /// non-blank line or buffer's end - pub post_blank: usize, -} - -impl FixedWidth<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> FixedWidth<'static> { - FixedWidth { - value: self.value.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -fn parse_internal(input: &str) -> IResult<&str, FixedWidth, ()> { - let (input, value) = lines_while(|line| { - let line = line.trim_start(); - line == ":" || line.starts_with(": ") - })(input)?; - - if value.is_empty() { - // TODO: better error kind - return Err(Err::Error(make_error(input, ErrorKind::Many0))); - } - - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - FixedWidth { - value: value.into(), - post_blank, - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - FixedWidth::parse( - r#": A -: -: B -: C - -"# - ), - Some(( - "", - FixedWidth { - value: r#": A -: -: B -: C -"# - .into(), - post_blank: 1 - } - )) - ); -} diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs deleted file mode 100644 index 2c91f16..0000000 --- a/src/elements/fn_def.rs +++ /dev/null @@ -1,117 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_while1}, - sequence::delimited, - IResult, -}; - -use crate::parse::combinators::{blank_lines_count, line}; - -/// Footnote Definition Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Default, Clone)] -pub struct FnDef<'a> { - /// Footnote label, used for reference - pub label: Cow<'a, str>, - /// Numbers of blank lines between last footnote definition's line and next - /// non-blank line or buffer's end - pub post_blank: usize, -} - -impl FnDef<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> FnDef<'static> { - FnDef { - label: self.label.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -fn parse_internal(input: &str) -> IResult<&str, (FnDef, &str), ()> { - let (input, label) = delimited( - tag("[fn:"), - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - tag("]"), - )(input)?; - - let (input, content) = line(input)?; - - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - ( - FnDef { - label: label.into(), - post_blank, - }, - content, - ), - )) -} - -#[test] -fn parse() { - assert_eq!( - FnDef::parse("[fn:1] https://orgmode.org"), - Some(( - "", - ( - FnDef { - label: "1".into(), - post_blank: 0 - }, - " https://orgmode.org" - ) - )) - ); - assert_eq!( - FnDef::parse("[fn:word_1] https://orgmode.org"), - Some(( - "", - ( - FnDef { - label: "word_1".into(), - post_blank: 0, - }, - " https://orgmode.org" - ) - )) - ); - assert_eq!( - FnDef::parse("[fn:WORD-1] https://orgmode.org"), - Some(( - "", - ( - FnDef { - label: "WORD-1".into(), - post_blank: 0, - }, - " https://orgmode.org" - ) - )) - ); - assert_eq!( - FnDef::parse("[fn:WORD]"), - Some(( - "", - ( - FnDef { - label: "WORD".into(), - post_blank: 0, - }, - "" - ) - )) - ); - - assert!(FnDef::parse("[fn:] https://orgmode.org").is_none()); - assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none()); - assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none()); -} diff --git a/src/elements/fn_ref.rs b/src/elements/fn_ref.rs deleted file mode 100644 index c03253e..0000000 --- a/src/elements/fn_ref.rs +++ /dev/null @@ -1,111 +0,0 @@ -use std::borrow::Cow; - -use memchr::memchr2_iter; -use nom::{ - bytes::complete::{tag, take_while}, - combinator::opt, - error::{make_error, ErrorKind}, - sequence::preceded, - Err, IResult, -}; - -/// Footnote Reference Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct FnRef<'a> { - /// Footnote label - pub label: Cow<'a, str>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub definition: Option>, -} - -impl FnRef<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, FnRef)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> FnRef<'static> { - FnRef { - label: self.label.into_owned().into(), - definition: self.definition.map(Into::into).map(Cow::Owned), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, FnRef, ()> { - let (input, _) = tag("[fn:")(input)?; - let (input, label) = - take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; - let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?; - let (input, _) = tag("]")(input)?; - - Ok(( - input, - FnRef { - label: label.into(), - definition: definition.map(Into::into), - }, - )) -} - -fn balanced_brackets(input: &str) -> IResult<&str, &str, ()> { - let mut pairs = 1; - for i in memchr2_iter(b'[', b']', input.as_bytes()) { - if input.as_bytes()[i] == b'[' { - pairs += 1; - } else if pairs != 1 { - pairs -= 1; - } else { - return Ok((&input[i..], &input[0..i])); - } - } - Err(Err::Error(make_error(input, ErrorKind::Tag))) -} - -#[test] -fn parse() { - assert_eq!( - FnRef::parse("[fn:1]"), - Some(( - "", - FnRef { - label: "1".into(), - definition: None - }, - )) - ); - assert_eq!( - FnRef::parse("[fn:1:2]"), - Some(( - "", - FnRef { - label: "1".into(), - definition: Some("2".into()) - }, - )) - ); - assert_eq!( - FnRef::parse("[fn::2]"), - Some(( - "", - FnRef { - label: "".into(), - definition: Some("2".into()) - }, - )) - ); - assert_eq!( - FnRef::parse("[fn::[]]"), - Some(( - "", - FnRef { - label: "".into(), - definition: Some("[]".into()) - }, - )) - ); - - assert!(FnRef::parse("[fn::[]").is_none()); -} diff --git a/src/elements/inline_call.rs b/src/elements/inline_call.rs deleted file mode 100644 index 8878beb..0000000 --- a/src/elements/inline_call.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_till}, - combinator::opt, - sequence::{delimited, preceded}, - IResult, -}; - -/// Inline Babel Call Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Default, Clone)] -pub struct InlineCall<'a> { - /// Called code block name - pub name: Cow<'a, str>, - /// Header arguments applied to the code block - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub inside_header: Option>, - /// Argument passed to the code block - pub arguments: Cow<'a, str>, - /// Header arguments applied to the calling instance - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub end_header: Option>, -} - -impl InlineCall<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, InlineCall)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> InlineCall<'static> { - InlineCall { - name: self.name.into_owned().into(), - arguments: self.arguments.into_owned().into(), - inside_header: self.inside_header.map(Into::into).map(Cow::Owned), - end_header: self.end_header.map(Into::into).map(Cow::Owned), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, InlineCall, ()> { - let (input, name) = preceded( - tag("call_"), - take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'), - )(input)?; - let (input, inside_header) = opt(delimited( - tag("["), - take_till(|c| c == ']' || c == '\n'), - tag("]"), - ))(input)?; - let (input, arguments) = - delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?; - let (input, end_header) = opt(delimited( - tag("["), - take_till(|c| c == ']' || c == '\n'), - tag("]"), - ))(input)?; - - Ok(( - input, - InlineCall { - name: name.into(), - arguments: arguments.into(), - inside_header: inside_header.map(Into::into), - end_header: end_header.map(Into::into), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - InlineCall::parse("call_square(4)"), - Some(( - "", - InlineCall { - name: "square".into(), - arguments: "4".into(), - inside_header: None, - end_header: None, - } - )) - ); - assert_eq!( - InlineCall::parse("call_square[:results output](4)"), - Some(( - "", - InlineCall { - name: "square".into(), - arguments: "4".into(), - inside_header: Some(":results output".into()), - end_header: None, - }, - )) - ); - assert_eq!( - InlineCall::parse("call_square(4)[:results html]"), - Some(( - "", - InlineCall { - name: "square".into(), - arguments: "4".into(), - inside_header: None, - end_header: Some(":results html".into()), - }, - )) - ); - assert_eq!( - InlineCall::parse("call_square[:results output](4)[:results html]"), - Some(( - "", - InlineCall { - name: "square".into(), - arguments: "4".into(), - inside_header: Some(":results output".into()), - end_header: Some(":results html".into()), - }, - )) - ); -} diff --git a/src/elements/inline_src.rs b/src/elements/inline_src.rs deleted file mode 100644 index f04d31a..0000000 --- a/src/elements/inline_src.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_till, take_while1}, - combinator::opt, - sequence::delimited, - IResult, -}; - -/// Inline Src Block Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct InlineSrc<'a> { - /// Language of the code - pub lang: Cow<'a, str>, - /// Optional header arguments - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub options: Option>, - /// Source code - pub body: Cow<'a, str>, -} - -impl InlineSrc<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, InlineSrc)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> InlineSrc<'static> { - InlineSrc { - lang: self.lang.into_owned().into(), - options: self.options.map(Into::into).map(Cow::Owned), - body: self.body.into_owned().into(), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, InlineSrc, ()> { - let (input, _) = tag("src_")(input)?; - let (input, lang) = - take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{')(input)?; - let (input, options) = opt(delimited( - tag("["), - take_till(|c| c == '\n' || c == ']'), - tag("]"), - ))(input)?; - let (input, body) = delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?; - - Ok(( - input, - InlineSrc { - lang: lang.into(), - options: options.map(Into::into), - body: body.into(), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - InlineSrc::parse("src_C{int a = 0;}"), - Some(( - "", - InlineSrc { - lang: "C".into(), - options: None, - body: "int a = 0;".into() - }, - )) - ); - assert_eq!( - InlineSrc::parse("src_xml[:exports code]{text}"), - Some(( - "", - InlineSrc { - lang: "xml".into(), - options: Some(":exports code".into()), - body: "text".into(), - }, - )) - ); - - assert!(InlineSrc::parse("src_xml[:exports code]{text").is_none()); - assert!(InlineSrc::parse("src_[:exports code]{text}").is_none()); - assert!(InlineSrc::parse("src_xml[:exports code]").is_none()); -} diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs deleted file mode 100644 index af8f8d9..0000000 --- a/src/elements/keyword.rs +++ /dev/null @@ -1,230 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_till}, - character::complete::space0, - combinator::opt, - sequence::delimited, - IResult, -}; - -use crate::elements::Element; -use crate::parse::combinators::{blank_lines_count, line}; - -/// Keyword Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Keyword<'a> { - /// Keyword name - pub key: Cow<'a, str>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub optional: Option>, - /// Keyword value - pub value: Cow<'a, str>, - /// Numbers of blank lines between keyword line and next non-blank line or - /// buffer's end - pub post_blank: usize, -} - -impl Keyword<'_> { - pub fn into_owned(self) -> Keyword<'static> { - Keyword { - key: self.key.into_owned().into(), - optional: self.optional.map(Into::into).map(Cow::Owned), - value: self.value.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -/// Babel Call Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct BabelCall<'a> { - /// Babel call value - pub value: Cow<'a, str>, - /// Numbers of blank lines between babel call line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl BabelCall<'_> { - pub fn into_owned(self) -> BabelCall<'static> { - BabelCall { - value: self.value.into_owned().into(), - post_blank: self.post_blank, - } - } -} - -#[derive(Debug)] -#[cfg_attr(test, derive(PartialEq))] -pub(crate) struct RawKeyword<'a> { - pub key: &'a str, - pub value: &'a str, - pub optional: Option<&'a str>, - pub post_blank: usize, -} - -impl<'a> RawKeyword<'a> { - pub fn parse(input: &str) -> Option<(&str, RawKeyword)> { - parse_internal(input).ok() - } - - pub fn into_element(self) -> Element<'a> { - let RawKeyword { - key, - value, - optional, - post_blank, - } = self; - - if (&*key).eq_ignore_ascii_case("CALL") { - BabelCall { - value: value.into(), - post_blank, - } - .into() - } else { - Keyword { - key: key.into(), - optional: optional.map(Into::into), - value: value.into(), - post_blank, - } - .into() - } - } -} - -fn parse_internal(input: &str) -> IResult<&str, RawKeyword, ()> { - let (input, _) = space0(input)?; - let (input, _) = tag("#+")(input)?; - let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; - let (input, optional) = opt(delimited( - tag("["), - take_till(|c| c == ']' || c == '\n'), - tag("]"), - ))(input)?; - let (input, _) = tag(":")(input)?; - let (input, value) = line(input)?; - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - RawKeyword { - key, - optional, - value: value.trim(), - post_blank, - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - RawKeyword::parse("#+KEY:"), - Some(( - "", - RawKeyword { - key: "KEY", - optional: None, - value: "", - post_blank: 0 - } - )) - ); - assert_eq!( - RawKeyword::parse("#+KEY: VALUE"), - Some(( - "", - RawKeyword { - key: "KEY", - optional: None, - value: "VALUE", - post_blank: 0 - } - )) - ); - assert_eq!( - RawKeyword::parse("#+K_E_Y: VALUE"), - Some(( - "", - RawKeyword { - key: "K_E_Y", - optional: None, - value: "VALUE", - post_blank: 0 - } - )) - ); - assert_eq!( - RawKeyword::parse("#+KEY:VALUE\n"), - Some(( - "", - RawKeyword { - key: "KEY", - optional: None, - value: "VALUE", - post_blank: 0 - } - )) - ); - assert!(RawKeyword::parse("#+KE Y: VALUE").is_none()); - assert!(RawKeyword::parse("#+ KEY: VALUE").is_none()); - - assert_eq!( - RawKeyword::parse("#+RESULTS:"), - Some(( - "", - RawKeyword { - key: "RESULTS", - optional: None, - value: "", - post_blank: 0 - } - )) - ); - - assert_eq!( - RawKeyword::parse("#+ATTR_LATEX: :width 5cm\n"), - Some(( - "", - RawKeyword { - key: "ATTR_LATEX", - optional: None, - value: ":width 5cm", - post_blank: 0 - } - )) - ); - - assert_eq!( - RawKeyword::parse("#+CALL: double(n=4)"), - Some(( - "", - RawKeyword { - key: "CALL", - optional: None, - value: "double(n=4)", - post_blank: 0 - } - )) - ); - - assert_eq!( - RawKeyword::parse("#+CAPTION[Short caption]: Longer caption."), - Some(( - "", - RawKeyword { - key: "CAPTION", - optional: Some("Short caption"), - value: "Longer caption.", - post_blank: 0 - } - )) - ); -} diff --git a/src/elements/link.rs b/src/elements/link.rs deleted file mode 100644 index b0bb08d..0000000 --- a/src/elements/link.rs +++ /dev/null @@ -1,80 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_while}, - combinator::opt, - sequence::delimited, - IResult, -}; - -/// Link Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Link<'a> { - /// Link destination - pub path: Cow<'a, str>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub desc: Option>, -} - -impl Link<'_> { - #[inline] - pub(crate) fn parse(input: &str) -> Option<(&str, Link)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Link<'static> { - Link { - path: self.path.into_owned().into(), - desc: self.desc.map(Into::into).map(Cow::Owned), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, Link, ()> { - let (input, path) = delimited( - tag("[["), - take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), - tag("]"), - )(input)?; - let (input, desc) = opt(delimited( - tag("["), - take_while(|c: char| c != '[' && c != ']'), - tag("]"), - ))(input)?; - let (input, _) = tag("]")(input)?; - Ok(( - input, - Link { - path: path.into(), - desc: desc.map(Into::into), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - Link::parse("[[#id]]"), - Some(( - "", - Link { - path: "#id".into(), - desc: None - } - )) - ); - assert_eq!( - Link::parse("[[#id][desc]]"), - Some(( - "", - Link { - path: "#id".into(), - desc: Some("desc".into()) - } - )) - ); - assert!(Link::parse("[[#id][desc]").is_none()); -} diff --git a/src/elements/list.rs b/src/elements/list.rs deleted file mode 100644 index 3b49852..0000000 --- a/src/elements/list.rs +++ /dev/null @@ -1,316 +0,0 @@ -use std::borrow::Cow; -use std::iter::once; - -use memchr::{memchr, memchr_iter}; -use nom::{ - branch::alt, - bytes::complete::tag, - character::complete::{digit1, space0}, - combinator::{map, recognize}, - sequence::terminated, - IResult, -}; - -/// Plain List Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct List { - /// List indent, number of whitespaces - pub indent: usize, - /// List's type, determined by the first item of this list - pub ordered: bool, - /// Numbers of blank lines between last list's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -/// List Item Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct ListItem<'a> { - /// List item bullet - pub bullet: Cow<'a, str>, - /// List item indent, number of whitespaces - pub indent: usize, - /// List item type - pub ordered: bool, - // TODO checkbox - // TODO counter - // TODO tag -} - -impl ListItem<'_> { - #[inline] - pub(crate) fn parse(input: &str) -> Option<(&str, (ListItem, &str))> { - list_item(input).ok() - } - - pub fn into_owned(self) -> ListItem<'static> { - ListItem { - bullet: self.bullet.into_owned().into(), - indent: self.indent, - ordered: self.ordered, - } - } -} - -fn list_item(input: &str) -> IResult<&str, (ListItem, &str), ()> { - let (input, indent) = map(space0, |s: &str| s.len())(input)?; - let (input, bullet) = recognize(alt(( - tag("+ "), - tag("* "), - tag("- "), - terminated(digit1, tag(". ")), - )))(input)?; - let (input, contents) = list_item_contents(input, indent); - Ok(( - input, - ( - ListItem { - bullet: bullet.into(), - indent, - ordered: bullet.starts_with(|c: char| c.is_ascii_digit()), - }, - contents, - ), - )) -} - -fn list_item_contents(input: &str, indent: usize) -> (&str, &str) { - let mut last_end = memchr(b'\n', input.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| input.len()); - - for i in memchr_iter(b'\n', input.as_bytes()) - .map(|i| i + 1) - .chain(once(input.len())) - .skip(1) - { - if input[last_end..i] - .as_bytes() - .iter() - .all(u8::is_ascii_whitespace) - { - let x = memchr(b'\n', &input[i..].as_bytes()) - .map(|ii| i + ii + 1) - .unwrap_or_else(|| input.len()); - - // two consecutive empty lines - if input[i..x].as_bytes().iter().all(u8::is_ascii_whitespace) { - return (&input[x..], &input[0..x]); - } - } - - // line less or equally indented than the starting line - if input[last_end..i] - .as_bytes() - .iter() - .take(indent + 1) - .any(|c| !c.is_ascii_whitespace()) - { - return (&input[last_end..], &input[0..last_end]); - } - - last_end = i; - } - - ("", input) -} - -#[test] -fn parse() { - assert_eq!( - list_item( - r#"+ item1 -+ item2"# - ), - Ok(( - "+ item2", - ( - ListItem { - bullet: "+ ".into(), - indent: 0, - ordered: false, - }, - r#"item1 -"# - ) - )) - ); - assert_eq!( - list_item( - r#"* item1 - -* item2"# - ), - Ok(( - "* item2", - ( - ListItem { - bullet: "* ".into(), - indent: 0, - ordered: false, - }, - r#"item1 - -"# - ) - )) - ); - assert_eq!( - list_item( - r#"* item1 - - -* item2"# - ), - Ok(( - "* item2", - ( - ListItem { - bullet: "* ".into(), - indent: 0, - ordered: false, - }, - r#"item1 - - -"# - ) - )) - ); - assert_eq!( - list_item( - r#"* item1 - -"# - ), - Ok(( - "", - ( - ListItem { - bullet: "* ".into(), - indent: 0, - ordered: false, - }, - r#"item1 - -"# - ) - )) - ); - assert_eq!( - list_item( - r#"+ item1 - + item2 -"# - ), - Ok(( - "", - ( - ListItem { - bullet: "+ ".into(), - indent: 0, - ordered: false, - }, - r#"item1 - + item2 -"# - ) - )) - ); - assert_eq!( - list_item( - r#"+ item1 - - + item2 - -+ item 3"# - ), - Ok(( - "+ item 3", - ( - ListItem { - bullet: "+ ".into(), - indent: 0, - ordered: false, - }, - r#"item1 - - + item2 - -"# - ) - )) - ); - assert_eq!( - list_item( - r#" + item1 - - + item2"# - ), - Ok(( - " + item2", - ( - ListItem { - bullet: "+ ".into(), - indent: 2, - ordered: false, - }, - r#"item1 - -"# - ) - )) - ); - assert_eq!( - list_item( - r#" 1. item1 -2. item2 - 3. item3"# - ), - Ok(( - r#"2. item2 - 3. item3"#, - ( - ListItem { - bullet: "1. ".into(), - indent: 2, - ordered: true, - }, - r#"item1 -"# - ) - )) - ); - assert_eq!( - list_item( - r#"+ 1 - - - 2 - - - 3 - -+ 4"# - ), - Ok(( - "+ 4", - ( - ListItem { - bullet: "+ ".into(), - indent: 0, - ordered: false, - }, - r#"1 - - - 2 - - - 3 - -"# - ) - )) - ); -} diff --git a/src/elements/macros.rs b/src/elements/macros.rs deleted file mode 100644 index 8568d5b..0000000 --- a/src/elements/macros.rs +++ /dev/null @@ -1,91 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take, take_until, take_while1}, - combinator::{opt, verify}, - sequence::delimited, - IResult, -}; - -/// Macro Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Macros<'a> { - /// Macro name - pub name: Cow<'a, str>, - /// Arguments passed to the macro - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub arguments: Option>, -} - -impl Macros<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, Macros)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Macros<'static> { - Macros { - name: self.name.into_owned().into(), - arguments: self.arguments.map(Into::into).map(Cow::Owned), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, Macros, ()> { - let (input, _) = tag("{{{")(input)?; - let (input, name) = verify( - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - |s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()), - )(input)?; - let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?; - let (input, _) = tag("}}}")(input)?; - - Ok(( - input, - Macros { - name: name.into(), - arguments: arguments.map(Into::into), - }, - )) -} - -#[test] -fn test() { - assert_eq!( - Macros::parse("{{{poem(red,blue)}}}"), - Some(( - "", - Macros { - name: "poem".into(), - arguments: Some("red,blue".into()) - } - )) - ); - assert_eq!( - Macros::parse("{{{poem())}}}"), - Some(( - "", - Macros { - name: "poem".into(), - arguments: Some(")".into()) - } - )) - ); - assert_eq!( - Macros::parse("{{{author}}}"), - Some(( - "", - Macros { - name: "author".into(), - arguments: None - } - )) - ); - - assert!(Macros::parse("{{{0uthor}}}").is_none()); - assert!(Macros::parse("{{{author}}").is_none()); - assert!(Macros::parse("{{{poem(}}}").is_none()); - assert!(Macros::parse("{{{poem)}}}").is_none()); -} diff --git a/src/elements/mod.rs b/src/elements/mod.rs deleted file mode 100644 index ff99f76..0000000 --- a/src/elements/mod.rs +++ /dev/null @@ -1,245 +0,0 @@ -//! Org-mode elements - -pub(crate) mod block; -pub(crate) mod clock; -pub(crate) mod comment; -pub(crate) mod cookie; -pub(crate) mod drawer; -pub(crate) mod dyn_block; -pub(crate) mod emphasis; -pub(crate) mod fixed_width; -pub(crate) mod fn_def; -pub(crate) mod fn_ref; -pub(crate) mod inline_call; -pub(crate) mod inline_src; -pub(crate) mod keyword; -pub(crate) mod link; -pub(crate) mod list; -pub(crate) mod macros; -pub(crate) mod planning; -pub(crate) mod radio_target; -pub(crate) mod rule; -pub(crate) mod snippet; -pub(crate) mod table; -pub(crate) mod target; -pub(crate) mod timestamp; -pub(crate) mod title; - -pub use self::{ - block::{ - CenterBlock, CommentBlock, ExampleBlock, ExportBlock, QuoteBlock, SourceBlock, - SpecialBlock, VerseBlock, - }, - clock::Clock, - comment::Comment, - cookie::Cookie, - drawer::Drawer, - dyn_block::DynBlock, - fixed_width::FixedWidth, - fn_def::FnDef, - fn_ref::FnRef, - inline_call::InlineCall, - inline_src::InlineSrc, - keyword::{BabelCall, Keyword}, - link::Link, - list::{List, ListItem}, - macros::Macros, - planning::Planning, - rule::Rule, - snippet::Snippet, - table::{Table, TableCell, TableRow}, - target::Target, - timestamp::{Datetime, Timestamp}, - title::Title, -}; - -use std::borrow::Cow; - -/// Element Enum -#[derive(Debug)] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(tag = "type", rename_all = "kebab-case"))] -pub enum Element<'a> { - SpecialBlock(SpecialBlock<'a>), - QuoteBlock(QuoteBlock<'a>), - CenterBlock(CenterBlock<'a>), - VerseBlock(VerseBlock<'a>), - CommentBlock(CommentBlock<'a>), - ExampleBlock(ExampleBlock<'a>), - ExportBlock(ExportBlock<'a>), - SourceBlock(SourceBlock<'a>), - BabelCall(BabelCall<'a>), - Section, - Clock(Clock<'a>), - Cookie(Cookie<'a>), - RadioTarget, - Drawer(Drawer<'a>), - Document { pre_blank: usize }, - DynBlock(DynBlock<'a>), - FnDef(FnDef<'a>), - FnRef(FnRef<'a>), - Headline { level: usize }, - InlineCall(InlineCall<'a>), - InlineSrc(InlineSrc<'a>), - Keyword(Keyword<'a>), - Link(Link<'a>), - List(List), - ListItem(ListItem<'a>), - Macros(Macros<'a>), - Snippet(Snippet<'a>), - Text { value: Cow<'a, str> }, - Paragraph { post_blank: usize }, - Rule(Rule), - Timestamp(Timestamp<'a>), - Target(Target<'a>), - Bold, - Strike, - Italic, - Underline, - Verbatim { value: Cow<'a, str> }, - Code { value: Cow<'a, str> }, - Comment(Comment<'a>), - FixedWidth(FixedWidth<'a>), - Title(Title<'a>), - Table(Table<'a>), - TableRow(TableRow), - TableCell(TableCell), -} - -impl Element<'_> { - pub fn is_container(&self) -> bool { - match self { - Element::SpecialBlock(_) - | Element::QuoteBlock(_) - | Element::CenterBlock(_) - | Element::VerseBlock(_) - | Element::Bold - | Element::Document { .. } - | Element::DynBlock(_) - | Element::Headline { .. } - | Element::Italic - | Element::List(_) - | Element::ListItem(_) - | Element::Paragraph { .. } - | Element::Section - | Element::Strike - | Element::Underline - | Element::Title(_) - | Element::Table(_) - | Element::TableRow(TableRow::Header) - | Element::TableRow(TableRow::Body) - | Element::TableCell(_) => true, - _ => false, - } - } - - pub fn into_owned(self) -> Element<'static> { - use Element::*; - - match self { - SpecialBlock(e) => SpecialBlock(e.into_owned()), - QuoteBlock(e) => QuoteBlock(e.into_owned()), - CenterBlock(e) => CenterBlock(e.into_owned()), - VerseBlock(e) => VerseBlock(e.into_owned()), - CommentBlock(e) => CommentBlock(e.into_owned()), - ExampleBlock(e) => ExampleBlock(e.into_owned()), - ExportBlock(e) => ExportBlock(e.into_owned()), - SourceBlock(e) => SourceBlock(e.into_owned()), - BabelCall(e) => BabelCall(e.into_owned()), - Section => Section, - Clock(e) => Clock(e.into_onwed()), - Cookie(e) => Cookie(e.into_owned()), - RadioTarget => RadioTarget, - Drawer(e) => Drawer(e.into_owned()), - Document { pre_blank } => Document { pre_blank }, - DynBlock(e) => DynBlock(e.into_owned()), - FnDef(e) => FnDef(e.into_owned()), - FnRef(e) => FnRef(e.into_owned()), - Headline { level } => Headline { level }, - InlineCall(e) => InlineCall(e.into_owned()), - InlineSrc(e) => InlineSrc(e.into_owned()), - Keyword(e) => Keyword(e.into_owned()), - Link(e) => Link(e.into_owned()), - List(e) => List(e), - ListItem(e) => ListItem(e.into_owned()), - Macros(e) => Macros(e.into_owned()), - Snippet(e) => Snippet(e.into_owned()), - Text { value } => Text { - value: value.into_owned().into(), - }, - Paragraph { post_blank } => Paragraph { post_blank }, - Rule(e) => Rule(e), - Timestamp(e) => Timestamp(e.into_owned()), - Target(e) => Target(e.into_owned()), - Bold => Bold, - Strike => Strike, - Italic => Italic, - Underline => Underline, - Verbatim { value } => Verbatim { - value: value.into_owned().into(), - }, - Code { value } => Code { - value: value.into_owned().into(), - }, - Comment(e) => Comment(e.into_owned()), - FixedWidth(e) => FixedWidth(e.into_owned()), - Title(e) => Title(e.into_owned()), - Table(e) => Table(e.into_owned()), - TableRow(e) => TableRow(e), - TableCell(e) => TableCell(e), - } - } -} - -macro_rules! impl_from { - ($($ele0:ident),*; $($ele1:ident),*) => { - $( - impl<'a> From<$ele0<'a>> for Element<'a> { - fn from(ele: $ele0<'a>) -> Element<'a> { - Element::$ele0(ele) - } - } - )* - $( - impl<'a> From<$ele1> for Element<'a> { - fn from(ele: $ele1) -> Element<'a> { - Element::$ele1(ele) - } - } - )* - }; -} - -impl_from!( - BabelCall, - CenterBlock, - Clock, - Comment, - CommentBlock, - Cookie, - Drawer, - DynBlock, - ExampleBlock, - ExportBlock, - FixedWidth, - FnDef, - FnRef, - InlineCall, - InlineSrc, - Keyword, - Link, - ListItem, - Macros, - QuoteBlock, - Snippet, - SourceBlock, - SpecialBlock, - Table, - Target, - Timestamp, - Title, - VerseBlock; - List, - Rule, - TableRow -); diff --git a/src/elements/planning.rs b/src/elements/planning.rs deleted file mode 100644 index 1659924..0000000 --- a/src/elements/planning.rs +++ /dev/null @@ -1,98 +0,0 @@ -use memchr::memchr; - -use crate::elements::Timestamp; - -/// Planning element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Planning<'a> { - /// Timestamp associated to deadline keyword - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub deadline: Option>, - /// Timestamp associated to scheduled keyword - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub scheduled: Option>, - /// Timestamp associated to closed keyword - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub closed: Option>, -} - -impl Planning<'_> { - #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Planning)> { - let (mut deadline, mut scheduled, mut closed) = (None, None, None); - let (mut tail, off) = memchr(b'\n', text.as_bytes()) - .map(|i| (text[..i].trim(), i + 1)) - .unwrap_or_else(|| (text.trim(), text.len())); - - while let Some(i) = memchr(b' ', tail.as_bytes()) { - let next = &tail[i + 1..].trim_start(); - - macro_rules! set_timestamp { - ($timestamp:expr) => {{ - let (new_tail, timestamp) = - Timestamp::parse_active(next).or(Timestamp::parse_inactive(next))?; - $timestamp = Some(timestamp); - tail = new_tail.trim_start(); - }}; - } - - match &tail[..i] { - "DEADLINE:" if deadline.is_none() => set_timestamp!(deadline), - "SCHEDULED:" if scheduled.is_none() => set_timestamp!(scheduled), - "CLOSED:" if closed.is_none() => set_timestamp!(closed), - _ => return None, - } - } - - if deadline.is_none() && scheduled.is_none() && closed.is_none() { - None - } else { - Some(( - &text[off..], - Planning { - deadline, - scheduled, - closed, - }, - )) - } - } - - pub fn into_owned(self) -> Planning<'static> { - Planning { - deadline: self.deadline.map(|x| x.into_owned()), - scheduled: self.scheduled.map(|x| x.into_owned()), - closed: self.closed.map(|x| x.into_owned()), - } - } -} - -#[test] -fn prase() { - use crate::elements::Datetime; - - assert_eq!( - Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"), - Some(( - "", - Planning { - scheduled: Some(Timestamp::Active { - start: Datetime { - year: 2019, - month: 4, - day: 8, - dayname: "Mon".into(), - hour: None, - minute: None - }, - repeater: None, - delay: None - }), - deadline: None, - closed: None, - } - )) - ) -} diff --git a/src/elements/radio_target.rs b/src/elements/radio_target.rs deleted file mode 100644 index fd529c7..0000000 --- a/src/elements/radio_target.rs +++ /dev/null @@ -1,40 +0,0 @@ -use nom::{ - bytes::complete::{tag, take_while}, - combinator::verify, - sequence::delimited, - IResult, -}; - -// TODO: text-markup, entities, latex-fragments, subscript and superscript - -#[inline] -pub fn parse_radio_target(input: &str) -> Option<(&str, &str)> { - parse_internal(input).ok() -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, &str, ()> { - let (input, contents) = delimited( - tag("<<<"), - verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), - ), - tag(">>>"), - )(input)?; - - Ok((input, contents)) -} - -#[test] -fn parse() { - assert_eq!(parse_radio_target("<<>>"), Some(("", "target"))); - assert_eq!(parse_radio_target("<<>>"), Some(("", "tar get"))); - - assert!(parse_radio_target("<<>>").is_none()); - assert!(parse_radio_target("<<< target>>>").is_none()); - assert!(parse_radio_target("<<>>").is_none()); - assert!(parse_radio_target("<<get>>>").is_none()); - assert!(parse_radio_target("<<>>").is_none()); - assert!(parse_radio_target("<<>").is_none()); -} diff --git a/src/elements/rule.rs b/src/elements/rule.rs deleted file mode 100644 index b331746..0000000 --- a/src/elements/rule.rs +++ /dev/null @@ -1,48 +0,0 @@ -use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult}; - -use crate::parse::combinators::{blank_lines_count, eol}; - -#[derive(Debug, Default, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -pub struct Rule { - /// Numbers of blank lines between rule line and next non-blank line or - /// buffer's end - pub post_blank: usize, -} - -impl Rule { - pub(crate) fn parse(input: &str) -> Option<(&str, Rule)> { - parse_internal(input).ok() - } -} - -fn parse_internal(input: &str) -> IResult<&str, Rule, ()> { - let (input, _) = space0(input)?; - let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?; - let (input, _) = eol(input)?; - let (input, post_blank) = blank_lines_count(input)?; - Ok((input, Rule { post_blank })) -} - -#[test] -fn parse() { - assert_eq!(Rule::parse("-----"), Some(("", Rule { post_blank: 0 }))); - assert_eq!(Rule::parse("--------"), Some(("", Rule { post_blank: 0 }))); - assert_eq!( - Rule::parse("-----\n\n\n"), - Some(("", Rule { post_blank: 2 })) - ); - assert_eq!(Rule::parse("----- \n"), Some(("", Rule { post_blank: 0 }))); - - assert!(Rule::parse("").is_none()); - assert!(Rule::parse("----").is_none()); - assert!(Rule::parse("----").is_none()); - assert!(Rule::parse("None----").is_none()); - assert!(Rule::parse("None ----").is_none()); - assert!(Rule::parse("None------").is_none()); - assert!(Rule::parse("----None----").is_none()); - assert!(Rule::parse("\t\t----").is_none()); - assert!(Rule::parse("------None").is_none()); - assert!(Rule::parse("----- None").is_none()); -} diff --git a/src/elements/snippet.rs b/src/elements/snippet.rs deleted file mode 100644 index 31e2117..0000000 --- a/src/elements/snippet.rs +++ /dev/null @@ -1,100 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take, take_until, take_while1}, - sequence::{delimited, separated_pair}, - IResult, -}; - -/// Export Snippet Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Snippet<'a> { - /// Back-end name - pub name: Cow<'a, str>, - /// Export code - pub value: Cow<'a, str>, -} - -impl Snippet<'_> { - pub(crate) fn parse(input: &str) -> Option<(&str, Snippet)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Snippet<'static> { - Snippet { - name: self.name.into_owned().into(), - value: self.value.into_owned().into(), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, Snippet, ()> { - let (input, (name, value)) = delimited( - tag("@@"), - separated_pair( - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'), - tag(":"), - take_until("@@"), - ), - take(2usize), - )(input)?; - - Ok(( - input, - Snippet { - name: name.into(), - value: value.into(), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - Snippet::parse("@@html:@@"), - Some(( - "", - Snippet { - name: "html".into(), - value: "".into() - } - )) - ); - assert_eq!( - Snippet::parse("@@latex:any arbitrary LaTeX code@@"), - Some(( - "", - Snippet { - name: "latex".into(), - value: "any arbitrary LaTeX code".into(), - } - )) - ); - assert_eq!( - Snippet::parse("@@html:@@"), - Some(( - "", - Snippet { - name: "html".into(), - value: "".into(), - } - )) - ); - assert_eq!( - Snippet::parse("@@html:

@

@@"), - Some(( - "", - Snippet { - name: "html".into(), - value: "

@

".into(), - } - )) - ); - - assert!(Snippet::parse("@@html:@").is_none()); - assert!(Snippet::parse("@@html@@").is_none()); - assert!(Snippet::parse("@@:@@").is_none()); -} diff --git a/src/elements/table.rs b/src/elements/table.rs deleted file mode 100644 index 752083a..0000000 --- a/src/elements/table.rs +++ /dev/null @@ -1,169 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - error::{make_error, ErrorKind}, - Err, IResult, -}; - -use crate::parse::combinators::{blank_lines_count, line, lines_while}; - -/// Table Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(tag = "table_type"))] -pub enum Table<'a> { - /// "org" type table - #[cfg_attr(feature = "ser", serde(rename = "org"))] - Org { - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - tblfm: Option>, - /// Numbers of blank lines between last table's line and next non-blank - /// line or buffer's end - post_blank: usize, - has_header: bool, - }, - /// "table.el" type table - #[cfg_attr(feature = "ser", serde(rename = "table.el"))] - TableEl { - value: Cow<'a, str>, - /// Numbers of blank lines between last table's line and next non-blank - /// line or buffer's end - post_blank: usize, - }, -} - -impl Table<'_> { - pub fn parse_table_el(input: &str) -> Option<(&str, Table)> { - Self::parse_table_el_internal(input).ok() - } - - fn parse_table_el_internal(input: &str) -> IResult<&str, Table, ()> { - let (_, first_line) = line(input)?; - - let first_line = first_line.trim(); - - // Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs - if !first_line.starts_with("+-") - || first_line - .as_bytes() - .iter() - .any(|&c| c != b'+' && c != b'-') - { - // TODO: better error kind - return Err(Err::Error(make_error(input, ErrorKind::Many0))); - } - - // Table.el tables end at the first line not starting with either a vertical line or a plus sign. - let (input, content) = lines_while(|line| { - let line = line.trim_start(); - line.starts_with('|') || line.starts_with('+') - })(input)?; - - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - Table::TableEl { - value: content.into(), - post_blank, - }, - )) - } - - pub fn into_owned(self) -> Table<'static> { - match self { - Table::Org { - tblfm, - post_blank, - has_header, - } => Table::Org { - tblfm: tblfm.map(Into::into).map(Cow::Owned), - post_blank, - has_header, - }, - Table::TableEl { value, post_blank } => Table::TableEl { - value: value.into_owned().into(), - post_blank, - }, - } - } -} - -/// Table Row Element -/// -/// # Syntax -/// -/// ```text -/// | 0 | 1 | 2 | <- TableRow::Body -/// | 0 | 1 | 2 | <- TableRow::Body -/// ``` -/// -/// ```text -/// |-----+-----+-----| <- ignores -/// | 0 | 1 | 2 | <- TableRow::Header -/// | 0 | 1 | 2 | <- TableRow::Header -/// |-----+-----+-----| <- TableRow::HeaderRule -/// | 0 | 1 | 2 | <- TableRow::Body -/// |-----+-----+-----| <- TableRow::BodyRule -/// | 0 | 1 | 2 | <- TableRow::Body -/// |-----+-----+-----| <- TableRow::BodyRule -/// |-----+-----+-----| <- TableRow::BodyRule -/// | 0 | 1 | 2 | <- TableRow::Body -/// |-----+-----+-----| <- ignores -/// ``` -/// -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))] -#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] -pub enum TableRow { - /// This row is part of table header - Header, - /// This row is part of table body - Body, - /// This row is between table header and body - HeaderRule, - /// This row is between table body and next body - BodyRule, -} - -/// Table Cell Element -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))] -#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] -pub enum TableCell { - /// Header cell - Header, - /// Body cell, or standard cell - Body, -} - -#[test] -fn parse_table_el_() { - assert_eq!( - Table::parse_table_el( - r#" +---+ - | | - +---+ - -"# - ), - Some(( - "", - Table::TableEl { - value: r#" +---+ - | | - +---+ -"# - .into(), - post_blank: 1 - } - )) - ); - assert!(Table::parse_table_el("").is_none()); - assert!(Table::parse_table_el("+----|---").is_none()); -} diff --git a/src/elements/target.rs b/src/elements/target.rs deleted file mode 100644 index b847b59..0000000 --- a/src/elements/target.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take_while}, - combinator::verify, - sequence::delimited, - IResult, -}; - -/// Target Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Target<'a> { - /// Target ID - pub target: Cow<'a, str>, -} - -impl Target<'_> { - #[inline] - pub(crate) fn parse(input: &str) -> Option<(&str, Target)> { - parse_internal(input).ok() - } - - pub fn into_owned(self) -> Target<'static> { - Target { - target: self.target.into_owned().into(), - } - } -} - -#[inline] -fn parse_internal(input: &str) -> IResult<&str, Target, ()> { - let (input, target) = delimited( - tag("<<"), - verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), - ), - tag(">>"), - )(input)?; - - Ok(( - input, - Target { - target: target.into(), - }, - )) -} - -#[test] -fn parse() { - assert_eq!( - Target::parse("<>"), - Some(( - "", - Target { - target: "target".into() - } - )) - ); - assert_eq!( - Target::parse("<>"), - Some(( - "", - Target { - target: "tar get".into() - } - )) - ); - - assert!(Target::parse("<>").is_none()); - assert!(Target::parse("<< target>>").is_none()); - assert!(Target::parse("<>").is_none()); - assert!(Target::parse("<get>>").is_none()); - assert!(Target::parse("<>").is_none()); - assert!(Target::parse("<").is_none()); -} diff --git a/src/elements/timestamp.rs b/src/elements/timestamp.rs deleted file mode 100644 index 15f1255..0000000 --- a/src/elements/timestamp.rs +++ /dev/null @@ -1,482 +0,0 @@ -use std::borrow::Cow; - -use nom::{ - bytes::complete::{tag, take, take_till, take_while, take_while_m_n}, - character::complete::{space0, space1}, - combinator::{map, map_res, opt}, - sequence::preceded, - IResult, -}; - -/// Datetime Struct -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Datetime<'a> { - pub year: u16, - pub month: u8, - pub day: u8, - pub dayname: Cow<'a, str>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub hour: Option, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub minute: Option, -} - -impl Datetime<'_> { - pub fn into_owned(self) -> Datetime<'static> { - Datetime { - year: self.year, - month: self.month, - day: self.day, - dayname: self.dayname.into_owned().into(), - hour: self.hour, - minute: self.minute, - } - } -} - -#[cfg(feature = "chrono")] -mod chrono { - use super::Datetime; - use chrono::*; - - impl Into for Datetime<'_> { - fn into(self) -> NaiveDate { - (&self).into() - } - } - - impl Into for Datetime<'_> { - fn into(self) -> NaiveTime { - (&self).into() - } - } - - impl Into for Datetime<'_> { - fn into(self) -> NaiveDateTime { - (&self).into() - } - } - - impl Into> for Datetime<'_> { - fn into(self) -> DateTime { - (&self).into() - } - } - - impl Into for &Datetime<'_> { - fn into(self) -> NaiveDate { - NaiveDate::from_ymd(self.year.into(), self.month.into(), self.day.into()) - } - } - - impl Into for &Datetime<'_> { - fn into(self) -> NaiveTime { - NaiveTime::from_hms( - self.hour.unwrap_or_default().into(), - self.minute.unwrap_or_default().into(), - 0, - ) - } - } - - impl Into for &Datetime<'_> { - fn into(self) -> NaiveDateTime { - NaiveDateTime::new(self.into(), self.into()) - } - } - - impl Into> for &Datetime<'_> { - fn into(self) -> DateTime { - DateTime::from_utc(self.into(), Utc) - } - } -} - -/// Timestamp Object -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] -#[cfg_attr(feature = "ser", serde(tag = "timestamp_type"))] -#[derive(Debug, Clone)] -pub enum Timestamp<'a> { - Active { - start: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - }, - Inactive { - start: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - }, - ActiveRange { - start: Datetime<'a>, - end: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - }, - InactiveRange { - start: Datetime<'a>, - end: Datetime<'a>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - repeater: Option>, - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - delay: Option>, - }, - Diary { - value: Cow<'a, str>, - }, -} - -impl Timestamp<'_> { - pub(crate) fn parse_active(input: &str) -> Option<(&str, Timestamp)> { - parse_active(input).ok() - } - - pub(crate) fn parse_inactive(input: &str) -> Option<(&str, Timestamp)> { - parse_inactive(input).ok() - } - - pub(crate) fn parse_diary(input: &str) -> Option<(&str, Timestamp)> { - parse_diary(input).ok() - } - - pub fn into_owned(self) -> Timestamp<'static> { - match self { - Timestamp::Active { - start, - repeater, - delay, - } => Timestamp::Active { - start: start.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - }, - Timestamp::Inactive { - start, - repeater, - delay, - } => Timestamp::Inactive { - start: start.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - }, - Timestamp::ActiveRange { - start, - end, - repeater, - delay, - } => Timestamp::ActiveRange { - start: start.into_owned(), - end: end.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - }, - Timestamp::InactiveRange { - start, - end, - repeater, - delay, - } => Timestamp::InactiveRange { - start: start.into_owned(), - end: end.into_owned(), - repeater: repeater.map(Into::into).map(Cow::Owned), - delay: delay.map(Into::into).map(Cow::Owned), - }, - Timestamp::Diary { value } => Timestamp::Diary { - value: value.into_owned().into(), - }, - } - } -} - -pub fn parse_active(input: &str) -> IResult<&str, Timestamp, ()> { - let (input, _) = tag("<")(input)?; - let (input, start) = parse_datetime(input)?; - - if input.starts_with('-') { - let (input, (hour, minute)) = parse_time(&input[1..])?; - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag(">")(input)?; - let mut end = start.clone(); - end.hour = Some(hour); - end.minute = Some(minute); - return Ok(( - input, - Timestamp::ActiveRange { - start, - end, - repeater: None, - delay: None, - }, - )); - } - - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag(">")(input)?; - - if input.starts_with("--<") { - let (input, end) = parse_datetime(&input["--<".len()..])?; - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag(">")(input)?; - Ok(( - input, - Timestamp::ActiveRange { - start, - end, - repeater: None, - delay: None, - }, - )) - } else { - Ok(( - input, - Timestamp::Active { - start, - repeater: None, - delay: None, - }, - )) - } -} - -pub fn parse_inactive(input: &str) -> IResult<&str, Timestamp, ()> { - let (input, _) = tag("[")(input)?; - let (input, start) = parse_datetime(input)?; - - if input.starts_with('-') { - let (input, (hour, minute)) = parse_time(&input[1..])?; - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag("]")(input)?; - let mut end = start.clone(); - end.hour = Some(hour); - end.minute = Some(minute); - return Ok(( - input, - Timestamp::InactiveRange { - start, - end, - repeater: None, - delay: None, - }, - )); - } - - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag("]")(input)?; - - if input.starts_with("--[") { - let (input, end) = parse_datetime(&input["--[".len()..])?; - let (input, _) = space0(input)?; - // TODO: delay-or-repeater - let (input, _) = tag("]")(input)?; - Ok(( - input, - Timestamp::InactiveRange { - start, - end, - repeater: None, - delay: None, - }, - )) - } else { - Ok(( - input, - Timestamp::Inactive { - start, - repeater: None, - delay: None, - }, - )) - } -} - -pub fn parse_diary(input: &str) -> IResult<&str, Timestamp, ()> { - let (input, _) = tag("<%%(")(input)?; - let (input, value) = take_till(|c| c == ')' || c == '>' || c == '\n')(input)?; - let (input, _) = tag(")>")(input)?; - - Ok(( - input, - Timestamp::Diary { - value: value.into(), - }, - )) -} - -fn parse_time(input: &str) -> IResult<&str, (u8, u8), ()> { - let (input, hour) = map_res(take_while_m_n(1, 2, |c: char| c.is_ascii_digit()), |num| { - u8::from_str_radix(num, 10) - })(input)?; - let (input, _) = tag(":")(input)?; - let (input, minute) = map_res(take(2usize), |num| u8::from_str_radix(num, 10))(input)?; - Ok((input, (hour, minute))) -} - -fn parse_datetime(input: &str) -> IResult<&str, Datetime, ()> { - let parse_u8 = |num| u8::from_str_radix(num, 10); - - let (input, year) = map_res(take(4usize), |num| u16::from_str_radix(num, 10))(input)?; - let (input, _) = tag("-")(input)?; - let (input, month) = map_res(take(2usize), parse_u8)(input)?; - let (input, _) = tag("-")(input)?; - let (input, day) = map_res(take(2usize), parse_u8)(input)?; - let (input, _) = space1(input)?; - let (input, dayname) = take_while(|c: char| { - !c.is_ascii_whitespace() - && !c.is_ascii_digit() - && c != '+' - && c != '-' - && c != ']' - && c != '>' - })(input)?; - let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| { - (time.map(|t| t.0), time.map(|t| t.1)) - })(input)?; - - Ok(( - input, - Datetime { - year, - month, - day, - dayname: dayname.into(), - hour, - minute, - }, - )) -} - -// TODO -// #[cfg_attr(test, derive(PartialEq))] -// #[cfg_attr(feature = "ser", derive(serde::Serialize))] -// #[derive(Debug, Copy, Clone)] -// pub enum RepeaterType { -// Cumulate, -// CatchUp, -// Restart, -// } - -// #[cfg_attr(test, derive(PartialEq))] -// #[cfg_attr(feature = "ser", derive(serde::Serialize))] -// #[derive(Debug, Copy, Clone)] -// pub enum DelayType { -// All, -// First, -// } - -// #[cfg_attr(test, derive(PartialEq))] -// #[cfg_attr(feature = "ser", derive(serde::Serialize))] -// #[derive(Debug, Copy, Clone)] -// pub enum TimeUnit { -// Hour, -// Day, -// Week, -// Month, -// Year, -// } - -// #[cfg_attr(test, derive(PartialEq))] -// #[cfg_attr(feature = "ser", derive(serde::Serialize))] -// #[derive(Debug, Copy, Clone)] -// pub struct Repeater { -// pub ty: RepeaterType, -// pub value: usize, -// pub unit: TimeUnit, -// } - -// #[cfg_attr(test, derive(PartialEq))] -// #[cfg_attr(feature = "ser", derive(serde::Serialize))] -// #[derive(Debug, Copy, Clone)] -// pub struct Delay { -// pub ty: DelayType, -// pub value: usize, -// pub unit: TimeUnit, -// } - -#[test] -fn parse() { - assert_eq!( - parse_inactive("[2003-09-16 Tue]"), - Ok(( - "", - Timestamp::Inactive { - start: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: None, - minute: None - }, - repeater: None, - delay: None, - }, - )) - ); - assert_eq!( - parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"), - Ok(( - "", - Timestamp::InactiveRange { - start: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(9), - minute: Some(39) - }, - end: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(10), - minute: Some(39), - }, - repeater: None, - delay: None - }, - )) - ); - assert_eq!( - parse_active("<2003-09-16 Tue 09:39-10:39>"), - Ok(( - "", - Timestamp::ActiveRange { - start: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(9), - minute: Some(39), - }, - end: Datetime { - year: 2003, - month: 9, - day: 16, - dayname: "Tue".into(), - hour: Some(10), - minute: Some(39), - }, - repeater: None, - delay: None - }, - )) - ); -} diff --git a/src/elements/title.rs b/src/elements/title.rs deleted file mode 100644 index d8ccb39..0000000 --- a/src/elements/title.rs +++ /dev/null @@ -1,510 +0,0 @@ -//! Headline Title - -#[cfg(not(feature = "indexmap"))] -pub type PropertiesMap = std::collections::HashMap; - -#[cfg(feature = "indexmap")] -pub type PropertiesMap = indexmap::IndexMap; - -use std::borrow::Cow; - -use memchr::memrchr2; -use nom::{ - branch::alt, - bytes::complete::{tag, take_until, take_while}, - character::complete::{anychar, line_ending, space1}, - combinator::{map, opt, verify}, - error::{make_error, ErrorKind}, - multi::fold_many0, - sequence::{delimited, preceded}, - Err, IResult, -}; - -use crate::{ - config::ParseConfig, - elements::{drawer::parse_drawer_without_blank, Planning, Timestamp}, - parse::combinators::{blank_lines_count, line, one_word}, -}; - -/// Title Element -#[cfg_attr(test, derive(PartialEq))] -#[cfg_attr(feature = "ser", derive(serde::Serialize))] -#[derive(Debug, Clone)] -pub struct Title<'a> { - /// Headline level, number of stars - pub level: usize, - /// Headline priority cookie - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub priority: Option, - /// Headline title tags - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Vec::is_empty"))] - pub tags: Vec>, - /// Headline todo keyword - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub keyword: Option>, - /// Raw headline's text, without the stars and the tags - pub raw: Cow<'a, str>, - /// Planning element associated to this headline - #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] - pub planning: Option>>, - /// Property drawer associated to this headline - #[cfg_attr( - feature = "ser", - serde(skip_serializing_if = "PropertiesMap::is_empty") - )] - pub properties: PropertiesMap, Cow<'a, str>>, - /// Numbers of blank lines between last title's line and next non-blank line - /// or buffer's end - pub post_blank: usize, -} - -impl Title<'_> { - pub(crate) fn parse<'a>( - input: &'a str, - config: &ParseConfig, - ) -> Option<(&'a str, (Title<'a>, &'a str))> { - parse_title(input, config).ok() - } - - // TODO: fn is_quoted(&self) -> bool { } - // TODO: fn is_footnote_section(&self) -> bool { } - - /// Returns this headline's closed timestamp, or `None` if not set. - pub fn closed(&self) -> Option<&Timestamp> { - self.planning.as_ref().and_then(|p| p.closed.as_ref()) - } - - /// Returns this headline's scheduled timestamp, or `None` if not set. - pub fn scheduled(&self) -> Option<&Timestamp> { - self.planning.as_ref().and_then(|p| p.scheduled.as_ref()) - } - - /// Returns this headline's deadline timestamp, or `None` if not set. - pub fn deadline(&self) -> Option<&Timestamp> { - self.planning.as_ref().and_then(|p| p.deadline.as_ref()) - } - - /// Returns `true` if this headline is archived - pub fn is_archived(&self) -> bool { - self.tags.iter().any(|tag| tag == "ARCHIVE") - } - - /// Returns `true` if this headline is commented - pub fn is_commented(&self) -> bool { - self.raw.starts_with("COMMENT") - && (self.raw.len() == 7 || self.raw[7..].starts_with(char::is_whitespace)) - } - - pub fn into_owned(self) -> Title<'static> { - Title { - level: self.level, - priority: self.priority, - tags: self - .tags - .into_iter() - .map(|s| s.into_owned().into()) - .collect(), - keyword: self.keyword.map(Into::into).map(Cow::Owned), - raw: self.raw.into_owned().into(), - planning: self.planning.map(|p| Box::new(p.into_owned())), - properties: self - .properties - .into_iter() - .map(|(k, v)| (k.into_owned().into(), v.into_owned().into())) - .collect(), - post_blank: self.post_blank, - } - } -} - -impl Default for Title<'_> { - fn default() -> Title<'static> { - Title { - level: 1, - priority: None, - tags: Vec::new(), - keyword: None, - raw: Cow::Borrowed(""), - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - } - } -} - -fn white_spaces_or_eol(input: &str) -> IResult<&str, &str, ()> { - alt((space1, line_ending))(input) -} - -#[inline] -fn parse_title<'a>( - input: &'a str, - config: &ParseConfig, -) -> IResult<&'a str, (Title<'a>, &'a str), ()> { - let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?; - - debug_assert!(level > 0); - - let (input, keyword) = opt(preceded( - space1, - verify(one_word, |s: &str| { - config.todo_keywords.0.iter().any(|x| x == s) - || config.todo_keywords.1.iter().any(|x| x == s) - }), - ))(input)?; - - let (input, priority) = opt(delimited( - space1, - delimited( - tag("[#"), - verify(anychar, |c: &char| c.is_ascii_uppercase()), - tag("]"), - ), - white_spaces_or_eol, - ))(input)?; - let (input, tail) = line(input)?; - let tail = tail.trim(); - - // tags can be separated by space or \t - let (raw, tags) = memrchr2(b' ', b'\t', tail.as_bytes()) - .map(|i| (tail[0..i].trim(), &tail[i + 1..])) - .filter(|(_, x)| is_tag_line(x)) - .unwrap_or((tail, "")); - - let tags = tags - .split(':') - .filter(|s| !s.is_empty()) - .map(Into::into) - .collect(); - - let (input, planning) = Planning::parse(input) - .map(|(input, planning)| (input, Some(Box::new(planning)))) - .unwrap_or((input, None)); - - let (input, properties) = opt(parse_properties_drawer)(input)?; - let (input, post_blank) = blank_lines_count(input)?; - - Ok(( - input, - ( - Title { - properties: properties.unwrap_or_default(), - level, - keyword: keyword.map(Into::into), - priority, - tags, - raw: raw.into(), - planning, - post_blank, - }, - raw, - ), - )) -} - -fn is_tag_line(input: &str) -> bool { - input.len() > 2 - && input.starts_with(':') - && input.ends_with(':') - && input.chars().all(|ch| { - ch.is_alphanumeric() || ch == '_' || ch == '@' || ch == '#' || ch == '%' || ch == ':' - }) -} - -#[inline] -fn parse_properties_drawer( - input: &str, -) -> IResult<&str, PropertiesMap, Cow<'_, str>>, ()> { - let (input, (drawer, content)) = parse_drawer_without_blank(input.trim_start())?; - if drawer.name != "PROPERTIES" { - return Err(Err::Error(make_error(input, ErrorKind::Tag))); - } - let (_, map) = fold_many0( - parse_node_property, - PropertiesMap::new(), - |mut acc: PropertiesMap<_, _>, (name, value)| { - acc.insert(name.into(), value.into()); - acc - }, - )(content)?; - Ok((input, map)) -} - -#[inline] -fn parse_node_property(input: &str) -> IResult<&str, (&str, &str), ()> { - let (input, _) = blank_lines_count(input)?; - let input = input.trim_start(); - let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| { - s.trim_end_matches('+') - })(input)?; - let (input, value) = line(input)?; - Ok((input, (name, value.trim()))) -} - -#[test] -fn parse_title_() { - use crate::config::DEFAULT_CONFIG; - - assert_eq!( - parse_title("**** DONE [#A] COMMENT Title :tag:a2%:", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: Some("DONE".into()), - priority: Some('A'), - raw: "COMMENT Title".into(), - tags: vec!["tag".into(), "a2%".into()], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "COMMENT Title" - ) - )) - ); - assert_eq!( - parse_title("**** ToDO [#A] COMMENT Title", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: None, - priority: None, - raw: "ToDO [#A] COMMENT Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "ToDO [#A] COMMENT Title" - ) - )) - ); - assert_eq!( - parse_title("**** T0DO [#A] COMMENT Title", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: None, - priority: None, - raw: "T0DO [#A] COMMENT Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "T0DO [#A] COMMENT Title" - ) - )) - ); - assert_eq!( - parse_title("**** DONE [#1] COMMENT Title", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: Some("DONE".into()), - priority: None, - raw: "[#1] COMMENT Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "[#1] COMMENT Title" - ) - )) - ); - assert_eq!( - parse_title("**** DONE [#a] COMMENT Title", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: Some("DONE".into()), - priority: None, - raw: "[#a] COMMENT Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "[#a] COMMENT Title" - ) - )) - ); - - // https://github.com/PoiScript/orgize/issues/20 - assert_eq!( - parse_title("** DONE [#B]::", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 2, - keyword: Some("DONE".into()), - priority: None, - raw: "[#B]::".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "[#B]::" - ) - )) - ); - - assert_eq!( - parse_title("**** Title :tag:a2%", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: None, - priority: None, - raw: "Title :tag:a2%".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "Title :tag:a2%" - ) - )) - ); - assert_eq!( - parse_title("**** Title tag:a2%:", &DEFAULT_CONFIG), - Ok(( - "", - ( - Title { - level: 4, - keyword: None, - priority: None, - raw: "Title tag:a2%:".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "Title tag:a2%:" - ) - )) - ); - - assert_eq!( - parse_title( - "**** DONE Title", - &ParseConfig { - todo_keywords: (vec![], vec![]), - ..Default::default() - } - ), - Ok(( - "", - ( - Title { - level: 4, - keyword: None, - priority: None, - raw: "DONE Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "DONE Title" - ) - )) - ); - assert_eq!( - parse_title( - "**** TASK [#A] Title", - &ParseConfig { - todo_keywords: (vec!["TASK".to_string()], vec![]), - ..Default::default() - } - ), - Ok(( - "", - ( - Title { - level: 4, - keyword: Some("TASK".into()), - priority: Some('A'), - raw: "Title".into(), - tags: vec![], - planning: None, - properties: PropertiesMap::new(), - post_blank: 0, - }, - "Title" - ) - )) - ); -} - -#[test] -fn parse_properties_drawer_() { - assert_eq!( - parse_properties_drawer(" :PROPERTIES:\n :CUSTOM_ID: id\n :END:"), - Ok(( - "", - vec![("CUSTOM_ID".into(), "id".into())] - .into_iter() - .collect::>() - )) - ) -} - -#[test] -fn preserve_properties_drawer_order() { - let mut properties = Vec::default(); - // Use a large number of properties to reduce false pass rate, since HashMap - // is non-deterministic. There are roughly 10^18 possible derangements of this sequence. - for i in 0..20 { - // Avoid alphabetic or numeric order. - let j = (i + 7) % 20; - properties.push(( - Cow::Owned(format!( - "{}{}", - if i % 3 == 0 { - "FOO" - } else if i % 3 == 1 { - "QUX" - } else { - "BAR" - }, - j - )), - Cow::Owned(i.to_string()), - )); - } - - let mut s = String::default(); - for (k, v) in &properties { - s += &format!(" :{}: {}\n", k, v); - } - let drawer = format!(" :PROPERTIES:\n{}:END:\n", &s); - let mut parsed: Vec<(_, _)> = parse_properties_drawer(&drawer) - .unwrap() - .1 - .into_iter() - .collect(); - - #[cfg(not(feature = "indexmap"))] - parsed.sort(); - #[cfg(not(feature = "indexmap"))] - properties.sort(); - - assert_eq!(parsed, properties); -} diff --git a/src/entities.rs b/src/entities.rs new file mode 100644 index 0000000..5f1faf9 --- /dev/null +++ b/src/entities.rs @@ -0,0 +1,468 @@ +// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85 +// nil -> false +// t -> true +// \x00A0 -> \\x00A0 +#[rustfmt::skip] +pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[ +// ("* Letters" +// Latin +("Agrave", "\\`{A}", false, "À", "A", "À", "À"), +("agrave", "\\`{a}", false, "à", "a", "à", "à"), +("Aacute", "\\'{A}", false, "Á", "A", "Á", "Á"), +("aacute", "\\'{a}", false, "á", "a", "á", "á"), +("Acirc", "\\^{A}", false, "Â", "A", "Â", "Â"), +("acirc", "\\^{a}", false, "â", "a", "â", "â"), +("Amacr", "\\={A}", false, "Ā", "A", "Ã", "Ã"), +("amacr", "\\={a}", false, "ā", "a", "ã", "ã"), +("Atilde", "\\~{A}", false, "Ã", "A", "Ã", "Ã"), +("atilde", "\\~{a}", false, "ã", "a", "ã", "ã"), +("Auml", "\\\"{A}", false, "Ä", "Ae", "Ä", "Ä"), +("auml", "\\\"{a}", false, "ä", "ae", "ä", "ä"), +("Aring", "\\AA{}", false, "Å", "A", "Å", "Å"), +("AA", "\\AA{}", false, "Å", "A", "Å", "Å"), +("aring", "\\aa{}", false, "å", "a", "å", "å"), +("AElig", "\\AE{}", false, "Æ", "AE", "Æ", "Æ"), +("aelig", "\\ae{}", false, "æ", "ae", "æ", "æ"), +("Ccedil", "\\c{C}", false, "Ç", "C", "Ç", "Ç"), +("ccedil", "\\c{c}", false, "ç", "c", "ç", "ç"), +("Egrave", "\\`{E}", false, "È", "E", "È", "È"), +("egrave", "\\`{e}", false, "è", "e", "è", "è"), +("Eacute", "\\'{E}", false, "É", "E", "É", "É"), +("eacute", "\\'{e}", false, "é", "e", "é", "é"), +("Ecirc", "\\^{E}", false, "Ê", "E", "Ê", "Ê"), +("ecirc", "\\^{e}", false, "ê", "e", "ê", "ê"), +("Euml", "\\\"{E}", false, "Ë", "E", "Ë", "Ë"), +("euml", "\\\"{e}", false, "ë", "e", "ë", "ë"), +("Igrave", "\\`{I}", false, "Ì", "I", "Ì", "Ì"), +("igrave", "\\`{i}", false, "ì", "i", "ì", "ì"), +("Iacute", "\\'{I}", false, "Í", "I", "Í", "Í"), +("iacute", "\\'{i}", false, "í", "i", "í", "í"), +("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"), +("inodot", "\\i", false, "ı", "i", "ı", "ı"), +("Icirc", "\\^{I}", false, "Î", "I", "Î", "Î"), +("icirc", "\\^{i}", false, "î", "i", "î", "î"), +("Iuml", "\\\"{I}", false, "Ï", "I", "Ï", "Ï"), +("iuml", "\\\"{i}", false, "ï", "i", "ï", "ï"), +("Ntilde", "\\~{N}", false, "Ñ", "N", "Ñ", "Ñ"), +("ntilde", "\\~{n}", false, "ñ", "n", "ñ", "ñ"), +("Ograve", "\\`{O}", false, "Ò", "O", "Ò", "Ò"), +("ograve", "\\`{o}", false, "ò", "o", "ò", "ò"), +("Oacute", "\\'{O}", false, "Ó", "O", "Ó", "Ó"), +("oacute", "\\'{o}", false, "ó", "o", "ó", "ó"), +("Ocirc", "\\^{O}", false, "Ô", "O", "Ô", "Ô"), +("ocirc", "\\^{o}", false, "ô", "o", "ô", "ô"), +("Otilde", "\\~{O}", false, "Õ", "O", "Õ", "Õ"), +("otilde", "\\~{o}", false, "õ", "o", "õ", "õ"), +("Ouml", "\\\"{O}", false, "Ö", "Oe", "Ö", "Ö"), +("ouml", "\\\"{o}", false, "ö", "oe", "ö", "ö"), +("Oslash", "\\O", false, "Ø", "O", "Ø", "Ø"), +("oslash", "\\o{}", false, "ø", "o", "ø", "ø"), +("OElig", "\\OE{}", false, "Œ", "OE", "OE", "Œ"), +("oelig", "\\oe{}", false, "œ", "oe", "oe", "œ"), +("Scaron", "\\v{S}", false, "Š", "S", "S", "Š"), +("scaron", "\\v{s}", false, "š", "s", "s", "š"), +("szlig", "\\ss{}", false, "ß", "ss", "ß", "ß"), +("Ugrave", "\\`{U}", false, "Ù", "U", "Ù", "Ù"), +("ugrave", "\\`{u}", false, "ù", "u", "ù", "ù"), +("Uacute", "\\'{U}", false, "Ú", "U", "Ú", "Ú"), +("uacute", "\\'{u}", false, "ú", "u", "ú", "ú"), +("Ucirc", "\\^{U}", false, "Û", "U", "Û", "Û"), +("ucirc", "\\^{u}", false, "û", "u", "û", "û"), +("Uuml", "\\\"{U}", false, "Ü", "Ue", "Ü", "Ü"), +("uuml", "\\\"{u}", false, "ü", "ue", "ü", "ü"), +("Yacute", "\\'{Y}", false, "Ý", "Y", "Ý", "Ý"), +("yacute", "\\'{y}", false, "ý", "y", "ý", "ý"), +("Yuml", "\\\"{Y}", false, "Ÿ", "Y", "Y", "Ÿ"), +("yuml", "\\\"{y}", false, "ÿ", "y", "ÿ", "ÿ"), + +// Latin (special face) +("fnof", "\\textit{f}", false, "ƒ", "f", "f", "ƒ"), +("real", "\\Re", true, "ℜ", "R", "R", "ℜ"), +("image", "\\Im", true, "ℑ", "I", "I", "ℑ"), +("weierp", "\\wp", true, "℘", "P", "P", "℘"), +("ell", "\\ell", true, "ℓ", "ell", "ell", "ℓ"), +("imath", "\\imath", true, "ı", "[dotless i]", "dotless i", "ı"), +("jmath", "\\jmath", true, "ȷ", "[dotless j]", "dotless j", "ȷ"), + +// Greek +("Alpha", "A", false, "Α", "Alpha", "Alpha", "Α"), +("alpha", "\\alpha", true, "α", "alpha", "alpha", "α"), +("Beta", "B", false, "Β", "Beta", "Beta", "Β"), +("beta", "\\beta", true, "β", "beta", "beta", "β"), +("Gamma", "\\Gamma", true, "Γ", "Gamma", "Gamma", "Γ"), +("gamma", "\\gamma", true, "γ", "gamma", "gamma", "γ"), +("Delta", "\\Delta", true, "Δ", "Delta", "Delta", "Δ"), +("delta", "\\delta", true, "δ", "delta", "delta", "δ"), +("Epsilon", "E", false, "Ε", "Epsilon", "Epsilon", "Ε"), +("epsilon", "\\epsilon", true, "ε", "epsilon", "epsilon", "ε"), +("varepsilon", "\\varepsilon", true, "ε", "varepsilon", "varepsilon", "ε"), +("Zeta", "Z", false, "Ζ", "Zeta", "Zeta", "Ζ"), +("zeta", "\\zeta", true, "ζ", "zeta", "zeta", "ζ"), +("Eta", "H", false, "Η", "Eta", "Eta", "Η"), +("eta", "\\eta", true, "η", "eta", "eta", "η"), +("Theta", "\\Theta", true, "Θ", "Theta", "Theta", "Θ"), +("theta", "\\theta", true, "θ", "theta", "theta", "θ"), +("thetasym", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), +("vartheta", "\\vartheta", true, "ϑ", "theta", "theta", "ϑ"), +("Iota", "I", false, "Ι", "Iota", "Iota", "Ι"), +("iota", "\\iota", true, "ι", "iota", "iota", "ι"), +("Kappa", "K", false, "Κ", "Kappa", "Kappa", "Κ"), +("kappa", "\\kappa", true, "κ", "kappa", "kappa", "κ"), +("Lambda", "\\Lambda", true, "Λ", "Lambda", "Lambda", "Λ"), +("lambda", "\\lambda", true, "λ", "lambda", "lambda", "λ"), +("Mu", "M", false, "Μ", "Mu", "Mu", "Μ"), +("mu", "\\mu", true, "μ", "mu", "mu", "μ"), +("nu", "\\nu", true, "ν", "nu", "nu", "ν"), +("Nu", "N", false, "Ν", "Nu", "Nu", "Ν"), +("Xi", "\\Xi", true, "Ξ", "Xi", "Xi", "Ξ"), +("xi", "\\xi", true, "ξ", "xi", "xi", "ξ"), +("Omicron", "O", false, "Ο", "Omicron", "Omicron", "Ο"), +("omicron", "\\textit{o}", false, "ο", "omicron", "omicron", "ο"), +("Pi", "\\Pi", true, "Π", "Pi", "Pi", "Π"), +("pi", "\\pi", true, "π", "pi", "pi", "π"), +("Rho", "P", false, "Ρ", "Rho", "Rho", "Ρ"), +("rho", "\\rho", true, "ρ", "rho", "rho", "ρ"), +("Sigma", "\\Sigma", true, "Σ", "Sigma", "Sigma", "Σ"), +("sigma", "\\sigma", true, "σ", "sigma", "sigma", "σ"), +("sigmaf", "\\varsigma", true, "ς", "sigmaf", "sigmaf", "ς"), +("varsigma", "\\varsigma", true, "ς", "varsigma", "varsigma", "ς"), +("Tau", "T", false, "Τ", "Tau", "Tau", "Τ"), +("Upsilon", "\\Upsilon", true, "Υ", "Upsilon", "Upsilon", "Υ"), +("upsih", "\\Upsilon", true, "ϒ", "upsilon", "upsilon", "ϒ"), +("upsilon", "\\upsilon", true, "υ", "upsilon", "upsilon", "υ"), +("Phi", "\\Phi", true, "Φ", "Phi", "Phi", "Φ"), +("phi", "\\phi", true, "φ", "phi", "phi", "ɸ"), +("varphi", "\\varphi", true, "ϕ", "varphi", "varphi", "φ"), +("Chi", "X", false, "Χ", "Chi", "Chi", "Χ"), +("chi", "\\chi", true, "χ", "chi", "chi", "χ"), +("acutex", "\\acute x", true, "´x", "'x", "'x", "𝑥́"), +("Psi", "\\Psi", true, "Ψ", "Psi", "Psi", "Ψ"), +("psi", "\\psi", true, "ψ", "psi", "psi", "ψ"), +("tau", "\\tau", true, "τ", "tau", "tau", "τ"), +("Omega", "\\Omega", true, "Ω", "Omega", "Omega", "Ω"), +("omega", "\\omega", true, "ω", "omega", "omega", "ω"), +("piv", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), +("varpi", "\\varpi", true, "ϖ", "omega-pi", "omega-pi", "ϖ"), +("partial", "\\partial", true, "∂", "[partial differential]", "[partial differential]", "∂"), + +// Hebrew +("alefsym", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), +("aleph", "\\aleph", true, "ℵ", "aleph", "aleph", "ℵ"), +("gimel", "\\gimel", true, "ℷ", "gimel", "gimel", "ℷ"), +("beth", "\\beth", true, "ℶ", "beth", "beth", "ב"), +("dalet", "\\daleth", true, "ℸ", "dalet", "dalet", "ד"), + +// Icelandic +("ETH", "\\DH{}", false, "Ð", "D", "Ð", "Ð"), +("eth", "\\dh{}", false, "ð", "dh", "ð", "ð"), +("THORN", "\\TH{}", false, "Þ", "TH", "Þ", "Þ"), +("thorn", "\\th{}", false, "þ", "th", "þ", "þ"), + + //, "* Punctuation", +// Dots and Marks +("dots", "\\dots{}", false, "…", "...", "...", "…"), +("cdots", "\\cdots{}", true, "⋯", "...", "...", "⋯"), +("hellip", "\\dots{}", false, "…", "...", "...", "…"), +("middot", "\\textperiodcentered{}", false, "·", ".", "·", "·"), +("iexcl", "!`", false, "¡", "!", "¡", "¡"), +("iquest", "?`", false, "¿", "?", "¿", "¿"), + +// Dash-like +("shy", "\\-", false, "­", "", "", ""), +("ndash", "--", false, "–", "-", "-", "–"), +("mdash", "---", false, "—", "--", "--", "—"), + +// Quotations +("quot", "\\textquotedbl{}", false, """, "\"", "\"", "\""), +("acute", "\\textasciiacute{}", false, "´", "'", "´", "´"), +("ldquo", "\\textquotedblleft{}", false, "“", "\"", "\"", "“"), +("rdquo", "\\textquotedblright{}", false, "”", "\"", "\"", "”"), +("bdquo", "\\quotedblbase{}", false, "„", "\"", "\"", "„"), +("lsquo", "\\textquoteleft{}", false, "‘", "`", "`", "‘"), +("rsquo", "\\textquoteright{}", false, "’", "'", "'", "’"), +("sbquo", "\\quotesinglbase{}", false, "‚", ", ", ", ", "‚"), +("laquo", "\\guillemotleft{}", false, "«", "<<", "«", "«"), +("raquo", "\\guillemotright{}", false, "»", ">>", "»", "»"), +("lsaquo", "\\guilsinglleft{}", false, "‹", "<", "<", "‹"), +("rsaquo", "\\guilsinglright{}", false, "›", ">", ">", "›"), + +//, "* Other", +// Misc. (often used) +("circ", "\\^{}", false, "ˆ", "^", "^", "∘"), +("vert", "\\vert{}", true, "|", "|", "|", "|"), +("vbar", "|", false, "|", "|", "|", "|"), +("brvbar", "\\textbrokenbar{}", false, "¦", "|", "¦", "¦"), +("S", "\\S", false, "§", "section", "§", "§"), +("sect", "\\S", false, "§", "section", "§", "§"), +("P", "\\P{}", false, "¶", "paragraph", "¶", "¶"), +("para", "\\P{}", false, "¶", "paragraph", "¶", "¶"), +("amp", "\\&", false, "&", "&", "&", "&"), +("lt", "\\textless{}", false, "<", "<", "<", "<"), +("gt", "\\textgreater{}", false, ">", ">", ">", ">"), +("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"), +("slash", "/", false, "/", "/", "/", "/"), +("plus", "+", false, "+", "+", "+", "+"), +("under", "\\_", false, "_", "_", "_", "_"), +("equal", "=", false, "=", "=", "=", "="), +("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"), +("dagger", "\\textdagger{}", false, "†", "[dagger]", "[dagger]", "†"), +("dag", "\\dag{}", false, "†", "[dagger]", "[dagger]", "†"), +("Dagger", "\\textdaggerdbl{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), +("ddag", "\\ddag{}", false, "‡", "[doubledagger]", "[doubledagger]", "‡"), + +// Whitespace +("nbsp", "~", false, " ", ", ", "\\x00A0", "\\x00A0"), +("ensp", "\\hspace*{.5em}", false, " ", ", ", ", ", " "), +("emsp", "\\hspace*{1em}", false, " ", ", ", ", ", " "), +("thinsp", "\\hspace*{.2em}", false, " ", ", ", ", ", " "), + +// Currency +("curren", "\\textcurrency{}", false, "¤", "curr.", "¤", "¤"), +("cent", "\\textcent{}", false, "¢", "cent", "¢", "¢"), +("pound", "\\pounds{}", false, "£", "pound", "£", "£"), +("yen", "\\textyen{}", false, "¥", "yen", "¥", "¥"), +("euro", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), +("EUR", "\\texteuro{}", false, "€", "EUR", "EUR", "€"), +("dollar", "\\$", false, "$", "$", "$", "$"), +("USD", "\\$", false, "$", "$", "$", "$"), + +// Property Marks +("copy", "\\textcopyright{}", false, "©", "(c)", "©", "©"), +("reg", "\\textregistered{}", false, "®", "(r)", "®", "®"), +("trade", "\\texttrademark{}", false, "™", "TM", "TM", "™"), + +// Science, etrueal. +("minus", "-", true, "−", "-", "-", "−"), +("pm", "\\textpm{}", false, "±", "+-", "±", "±"), +("plusmn", "\\textpm{}", false, "±", "+-", "±", "±"), +("times", "\\texttimes{}", false, "×", "*", "×", "×"), +("frasl", "/", false, "⁄", "/", "/", "⁄"), +("colon", "\\colon", true, ":", ":", ":", ":"), +("div", "\\textdiv{}", false, "÷", "/", "÷", "÷"), +("frac12", "\\textonehalf{}", false, "½", "1/2", "½", "½"), +("frac14", "\\textonequarter{}", false, "¼", "1/4", "¼", "¼"), +("frac34", "\\textthreequarters{}", false, "¾", "3/4", "¾", "¾"), +("permil", "\\textperthousand{}", false, "‰", "per thousand", "per thousand", "‰"), +("sup1", "\\textonesuperior{}", false, "¹", "^1", "¹", "¹"), +("sup2", "\\texttwosuperior{}", false, "²", "^2", "²", "²"), +("sup3", "\\textthreesuperior{}", false, "³", "^3", "³", "³"), +("radic", "\\sqrt{\\,}", true, "√", "[square root]", "[square root]", "√"), +("sum", "\\sum", true, "∑", "[sum]", "[sum]", "∑"), +("prod", "\\prod", true, "∏", "[product]", "[n-ary product]", "∏"), +("micro", "\\textmu{}", false, "µ", "micro", "µ", "µ"), +("macr", "\\textasciimacron{}", false, "¯", "[macron]", "¯", "¯"), +("deg", "\\textdegree{}", false, "°", "degree", "°", "°"), +("prime", "\\prime", true, "′", "'", "'", "′"), +("Prime", "\\prime{}\\prime", true, "″", "''", "''", "″"), +("infin", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), +("infty", "\\infty", true, "∞", "[infinity]", "[infinity]", "∞"), +("prop", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), +("propto", "\\propto", true, "∝", "[proportional to]", "[proportional to]", "∝"), +("not", "\\textlnot{}", false, "¬", "[angled dash]", "¬", "¬"), +("neg", "\\neg{}", true, "¬", "[angled dash]", "¬", "¬"), +("land", "\\land", true, "∧", "[logical and]", "[logical and]", "∧"), +("wedge", "\\wedge", true, "∧", "[logical and]", "[logical and]", "∧"), +("lor", "\\lor", true, "∨", "[logical or]", "[logical or]", "∨"), +("vee", "\\vee", true, "∨", "[logical or]", "[logical or]", "∨"), +("cap", "\\cap", true, "∩", "[intersection]", "[intersection]", "∩"), +("cup", "\\cup", true, "∪", "[union]", "[union]", "∪"), +("smile", "\\smile", true, "⌣", "[cup product]", "[cup product]", "⌣"), +("frown", "\\frown", true, "⌢", "[Cap product]", "[cap product]", "⌢"), +("int", "\\int", true, "∫", "[integral]", "[integral]", "∫"), +("therefore", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), +("there4", "\\therefore", true, "∴", "[therefore]", "[therefore]", "∴"), +("because", "\\because", true, "∵", "[because]", "[because]", "∵"), +("sim", "\\sim", true, "∼", "~", "~", "∼"), +("cong", "\\cong", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), +("simeq", "\\simeq", true, "≅", "[approx. equal to]", "[approx. equal to]", "≅"), +("asymp", "\\asymp", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), +("approx", "\\approx", true, "≈", "[, almostrueequal to]", "[, almostrueequal to]", "≈"), +("ne", "\\ne", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), +("neq", "\\neq", true, "≠", "[, notrueequal to]", "[, notrueequal to]", "≠"), +("equiv", "\\equiv", true, "≡", "[identical to]", "[identical to]", "≡"), + +("triangleq", "\\triangleq", true, "≜", "[defined to]", "[defined to]", "≜"), +("le", "\\le", true, "≤", "<=", "<=", "≤"), +("leq", "\\le", true, "≤", "<=", "<=", "≤"), +("ge", "\\ge", true, "≥", ">=", ">=", "≥"), +("geq", "\\ge", true, "≥", ">=", ">=", "≥"), +("lessgtr", "\\lessgtr", true, "≶", "[less than or greater than]", "[less than or greater than]", "≶"), +("lesseqgtr", "\\lesseqgtr", true, "⋚", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", "⋚"), +("ll", "\\ll", true, "≪", "<<", "<<", "≪"), +("Ll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), +("lll", "\\lll", true, "⋘", "<<<", "<<<", "⋘"), +("gg", "\\gg", true, "≫", ">>", ">>", "≫"), +("Gg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), +("ggg", "\\ggg", true, "⋙", ">>>", ">>>", "⋙"), +("prec", "\\prec", true, "≺", "[precedes]", "[precedes]", "≺"), +("preceq", "\\preceq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), +("preccurlyeq", "\\preccurlyeq", true, "≼", "[precedes or equal]", "[precedes or equal]", "≼"), +("succ", "\\succ", true, "≻", "[succeeds]", "[succeeds]", "≻"), +("succeq", "\\succeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), +("succcurlyeq", "\\succcurlyeq", true, "≽", "[succeeds or equal]", "[succeeds or equal]", "≽"), +("sub", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), +("subset", "\\subset", true, "⊂", "[, subsetrueof]", "[, subsetrueof]", "⊂"), +("sup", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), +("supset", "\\supset", true, "⊃", "[, supersetrueof]", "[, supersetrueof]", "⊃"), +("nsub", "\\not\\subset", true, "⊄", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", "⊄"), +("sube", "\\subseteq", true, "⊆", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", "⊆"), +("nsup", "\\not\\supset", true, "⊅", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", "⊅"), +("supe", "\\supseteq", true, "⊇", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", "⊇"), +("setminus", "\\setminus", true, "∖", "\\", "\\", "⧵"), +("forall", "\\forall", true, "∀", "[for all]", "[for all]", "∀"), +("exist", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), +("exists", "\\exists", true, "∃", "[there exists]", "[there exists]", "∃"), +("nexist", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), +("nexists", "\\nexists", true, "∃", "[there does, notrueexists]", "[there does, notrue exists]", "∄"), +("empty", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), +("emptyset", "\\emptyset", true, "∅", "[empty set]", "[empty set]", "∅"), +("isin", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), +("in", "\\in", true, "∈", "[, elementrueof]", "[, elementrueof]", "∈"), +("notin", "\\notin", true, "∉", "[, notruean, elementrueof]", "[, notruean, elementrueof]", "∉"), +("ni", "\\ni", true, "∋", "[contains as member]", "[contains as member]", "∋"), +("nabla", "\\nabla", true, "∇", "[nabla]", "[nabla]", "∇"), +("ang", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), +("angle", "\\angle", true, "∠", "[angle]", "[angle]", "∠"), +("perp", "\\perp", true, "⊥", "[up tack]", "[up tack]", "⊥"), +("parallel", "\\parallel", true, "∥", "||", "||", "∥"), +("sdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), +("cdot", "\\cdot", true, "⋅", "[dot]", "[dot]", "⋅"), +("lceil", "\\lceil", true, "⌈", "[, leftrueceiling]", "[, leftrueceiling]", "⌈"), +("rceil", "\\rceil", true, "⌉", "[, rightrueceiling]", "[, rightrueceiling]", "⌉"), +("lfloor", "\\lfloor", true, "⌊", "[, leftruefloor]", "[, leftruefloor]", "⌊"), +("rfloor", "\\rfloor", true, "⌋", "[, rightruefloor]", "[, rightruefloor]", "⌋"), +("lang", "\\langle", true, "⟨", "<", "<", "⟨"), +("rang", "\\rangle", true, "⟩", ">", ">", "⟩"), +("langle", "\\langle", true, "⟨", "<", "<", "⟨"), +("rangle", "\\rangle", true, "⟩", ">", ">", "⟩"), +("hbar", "\\hbar", true, "ℏ", "hbar", "hbar", "ℏ"), +("mho", "\\mho", true, "℧", "mho", "mho", "℧"), + +// Arrows +("larr", "\\leftarrow", true, "←", "<-", "<-", "←"), +("leftarrow", "\\leftarrow", true, "←", "<-", "<-", "←"), +("gets", "\\gets", true, "←", "<-", "<-", "←"), +("lArr", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), +("Leftarrow", "\\Leftarrow", true, "⇐", "<=", "<=", "⇐"), +("uarr", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), +("uparrow", "\\uparrow", true, "↑", "[uparrow]", "[uparrow]", "↑"), +("uArr", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), +("Uparrow", "\\Uparrow", true, "⇑", "[dbluparrow]", "[dbluparrow]", "⇑"), +("rarr", "\\rightarrow", true, "→", "->", "->", "→"), +("to", "\\to", true, "→", "->", "->", "→"), +("rightarrow", "\\rightarrow", true, "→", "->", "->", "→"), +("rArr", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), +("Rightarrow", "\\Rightarrow", true, "⇒", "=>", "=>", "⇒"), +("darr", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), +("downarrow", "\\downarrow", true, "↓", "[downarrow]", "[downarrow]", "↓"), +("dArr", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), +("Downarrow", "\\Downarrow", true, "⇓", "[dbldownarrow]", "[dbldownarrow]", "⇓"), +("harr", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), +("leftrightarrow", "\\leftrightarrow", true, "↔", "<->", "<->", "↔"), +("hArr", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), +("Leftrightarrow", "\\Leftrightarrow", true, "⇔", "<=>", "<=>", "⇔"), +("crarr", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), +("hookleftarrow", "\\hookleftarrow", true, "↵", "<-'", "<-'", "↵"), + +// Function names +("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"), +("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"), +("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"), +("arg", "\\arg", true, "arg", "arg", "arg", "arg"), +("cos", "\\cos", true, "cos", "cos", "cos", "cos"), +("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"), +("cot", "\\cot", true, "cot", "cot", "cot", "cot"), +("coth", "\\coth", true, "coth", "coth", "coth", "coth"), +("csc", "\\csc", true, "csc", "csc", "csc", "csc"), +("deg", "\\deg", true, "°", "deg", "deg", "deg"), +("det", "\\det", true, "det", "det", "det", "det"), +("dim", "\\dim", true, "dim", "dim", "dim", "dim"), +("exp", "\\exp", true, "exp", "exp", "exp", "exp"), +("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"), +("hom", "\\hom", true, "hom", "hom", "hom", "hom"), +("inf", "\\inf", true, "inf", "inf", "inf", "inf"), +("ker", "\\ker", true, "ker", "ker", "ker", "ker"), +("lg", "\\lg", true, "lg", "lg", "lg", "lg"), +("lim", "\\lim", true, "lim", "lim", "lim", "lim"), +("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"), +("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"), +("ln", "\\ln", true, "ln", "ln", "ln", "ln"), +("log", "\\log", true, "log", "log", "log", "log"), +("max", "\\max", true, "max", "max", "max", "max"), +("min", "\\min", true, "min", "min", "min", "min"), +("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"), +("sec", "\\sec", true, "sec", "sec", "sec", "sec"), +("sin", "\\sin", true, "sin", "sin", "sin", "sin"), +("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"), +("sup", "\\sup", true, "⊃", "sup", "sup", "sup"), +("tan", "\\tan", true, "tan", "tan", "tan", "tan"), +("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"), + +// Signs & Symbols +("bull", "\\textbullet{}", false, "•", "*", "*", "•"), +("bullet", "\\textbullet{}", false, "•", "*", "*", "•"), +("star", "\\star", true, "*", "*", "*", "⋆"), +("lowast", "\\ast", true, "∗", "*", "*", "∗"), +("ast", "\\ast", true, "∗", "*", "*", "*"), +("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"), +("oplus", "\\oplus", true, "⊕", "[circled plus]", "[circled plus]", "⊕"), +("otimes", "\\otimes", true, "⊗", "[circled times]", "[circled times]", "⊗"), +("check", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), +("checkmark", "\\checkmark", true, "✓", "[checkmark]", "[checkmark]", "✓"), + +// Miscellaneous (seldom used) +("ordf", "\\textordfeminine{}", false, "ª", "_a_", "ª", "ª"), +("ordm", "\\textordmasculine{}", false, "º", "_o_", "º", "º"), +("cedil", "\\c{}", false, "¸", "[cedilla]", "¸", "¸"), +("oline", "\\overline{~}", true, "‾", "[overline]", "¯", "‾"), +("uml", "\\textasciidieresis{}", false, "¨", "[diaeresis]", "¨", "¨"), +("zwnj", "\\/{}", false, "‌", "", "", "‌"), +("zwj", "", false, "‍", "", "", "‍"), +("lrm", "", false, "‎", "", "", "\u{200E}"), +("rlm", "", false, "‏", "", "", "\u{200F}"), + +// Smilies +("smiley", "\\ddot\\smile", true, "☺", ":-)", ":-)", "☺"), +("blacksmile", "\\ddot\\smile", true, "☻", ":-)", ":-)", "☻"), +("sad", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), +("frowny", "\\ddot\\frown", true, "☹", ":-(", ":-(", "☹"), + +// Suits +("clubs", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), +("clubsuit", "\\clubsuit", true, "♣", "[clubs]", "[clubs]", "♣"), +("spades", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), +("spadesuit", "\\spadesuit", true, "♠", "[spades]", "[spades]", "♠"), +("hearts", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), +("heartsuit", "\\heartsuit", true, "♥", "[hearts]", "[hearts]", "♥"), +("diams", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), +("diamondsuit", "\\diamondsuit", true, "♦", "[diamonds]", "[diamonds]", "◆"), +("diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), +("Diamond", "\\diamondsuit", true, "⋄", "[diamond]", "[diamond]", "◆"), +("loz", "\\lozenge", true, "◊", "[lozenge]", "[lozenge]", "⧫"), + +// spaces +// fish shell: +// for i in (seq 1 20) +// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")' +// end +(" ", "\\hspace*{0.5em}", true, " ", " ", " ", "\\x2002"), +(" ", "\\hspace*{1em}", true, "  ", " ", " ", "\\x2002\\x2002"), +(" ", "\\hspace*{1.5em}", true, "   ", " ", " ", "\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{2em}", true, "    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{2.5em}", true, "     ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{3em}", true, "      ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{3.5em}", true, "       ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{4em}", true, "        ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{4.5em}", true, "         ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{5em}", true, "          ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{5.5em}", true, "           ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{6em}", true, "            ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{6.5em}", true, "             ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{7em}", true, "              ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{7.5em}", true, "               ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{8em}", true, "                ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{8.5em}", true, "                 ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{9em}", true, "                  ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{9.5em}", true, "                   ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +(" ", "\\hspace*{10em}", true, "                    ", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"), +]; diff --git a/src/export/event.rs b/src/export/event.rs new file mode 100644 index 0000000..46f65c6 --- /dev/null +++ b/src/export/event.rs @@ -0,0 +1,73 @@ +use crate::ast::*; + +#[non_exhaustive] +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Container { + Document(Document), + Section(Section), + Paragraph(Paragraph), + Headline(Headline), + + OrgTable(OrgTable), + OrgTableRow(OrgTableRow), + OrgTableCell(OrgTableCell), + TableEl(TableEl), + + List(List), + ListItem(ListItem), + Drawer(Drawer), + DynBlock(DynBlock), + + FnDef(FnDef), + Comment(Comment), + FixedWidth(FixedWidth), + SpecialBlock(SpecialBlock), + QuoteBlock(QuoteBlock), + CenterBlock(CenterBlock), + VerseBlock(VerseBlock), + CommentBlock(CommentBlock), + ExampleBlock(ExampleBlock), + ExportBlock(ExportBlock), + SourceBlock(SourceBlock), + + Link(Link), + RadioTarget(RadioTarget), + FnRef(FnRef), + Target(Target), + Bold(Bold), + Strike(Strike), + Italic(Italic), + Underline(Underline), + Verbatim(Verbatim), + Code(Code), + Superscript(Superscript), + Subscript(Subscript), + BabelCall(BabelCall), + PropertyDrawer(PropertyDrawer), + AffiliatedKeyword(AffiliatedKeyword), + Keyword(Keyword), +} + +#[non_exhaustive] +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Event { + Enter(Container), + Leave(Container), + + Text(Token), + Macros(Macros), + Cookie(Cookie), + InlineCall(InlineCall), + InlineSrc(InlineSrc), + Clock(Clock), + LineBreak(LineBreak), + Snippet(Snippet), + Rule(Rule), + Timestamp(Timestamp), + LatexFragment(LatexFragment), + LatexEnvironment(LatexEnvironment), + Entity(Entity), + + #[cfg(feature = "syntax-org-fc")] + Cloze(Cloze), +} diff --git a/src/export/html.rs b/src/export/html.rs index e0b0bd1..fa31d42 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -1,10 +1,12 @@ +use rowan::NodeOrToken; +use std::cmp::min; use std::fmt; -use std::io::{Error, Result as IOResult, Write}; +use std::fmt::Write as _; -use jetscii::{bytes, BytesConst}; - -use crate::elements::{Element, Table, TableCell, TableRow, Timestamp}; -use crate::export::write_datetime; +use super::event::{Container, Event}; +use super::TraversalContext; +use super::Traverser; +use crate::{SyntaxElement, SyntaxKind, SyntaxNode}; /// A wrapper for escaping sensitive characters in html. /// @@ -26,11 +28,7 @@ impl> fmt::Display for HtmlEscape { let content = self.0.as_ref(); let bytes = content.as_bytes(); - lazy_static::lazy_static! { - static ref ESCAPE_BYTES: BytesConst = bytes!(b'<', b'>', b'&', b'\'', b'"'); - } - - while let Some(off) = ESCAPE_BYTES.find(&bytes[pos..]) { + while let Some(off) = jetscii::bytes!(b'<', b'>', b'&', b'\'', b'"').find(&bytes[pos..]) { write!(f, "{}", &content[pos..pos + off])?; pos += off + 1; @@ -41,7 +39,7 @@ impl> fmt::Display for HtmlEscape { b'&' => write!(f, "&")?, b'\'' => write!(f, "'")?, b'"' => write!(f, """)?, - _ => unreachable!(), + _ => {} } } @@ -49,349 +47,295 @@ impl> fmt::Display for HtmlEscape { } } -pub trait HtmlHandler>: Default { - fn start(&mut self, w: W, element: &Element) -> Result<(), E>; - fn end(&mut self, w: W, element: &Element) -> Result<(), E>; -} - -/// Default Html Handler #[derive(Default)] -pub struct DefaultHtmlHandler; +pub struct HtmlExport { + output: String, -impl HtmlHandler for DefaultHtmlHandler { - fn start(&mut self, mut w: W, element: &Element) -> IOResult<()> { - match element { - // container elements - Element::SpecialBlock(_) => (), - Element::QuoteBlock(_) => write!(w, "
")?, - Element::CenterBlock(_) => write!(w, "
")?, - Element::VerseBlock(_) => write!(w, "

")?, - Element::Bold => write!(w, "")?, - Element::Document { .. } => write!(w, "

")?, - Element::DynBlock(_dyn_block) => (), - Element::Headline { .. } => (), - Element::List(list) => { - if list.ordered { - write!(w, "
    ")?; - } else { - write!(w, "
      ")?; - } - } - Element::Italic => write!(w, "")?, - Element::ListItem(_) => write!(w, "
    • ")?, - Element::Paragraph { .. } => write!(w, "

      ")?, - Element::Section => write!(w, "

      ")?, - Element::Strike => write!(w, "")?, - Element::Underline => write!(w, "")?, - // non-container elements - Element::CommentBlock(_) => (), - Element::ExampleBlock(block) => write!( - w, - "
      {}
      ", - HtmlEscape(&block.contents) - )?, - Element::ExportBlock(block) => { - if block.data.eq_ignore_ascii_case("HTML") { - write!(w, "{}", block.contents)? - } - } - Element::SourceBlock(block) => { - if block.language.is_empty() { - write!( - w, - "
      {}
      ", - HtmlEscape(&block.contents) - )?; - } else { - write!( - w, - "
      {}
      ", - block.language, - HtmlEscape(&block.contents) - )?; - } - } - Element::BabelCall(_) => (), - Element::InlineSrc(inline_src) => write!( - w, - "{}", - inline_src.lang, - HtmlEscape(&inline_src.body) - )?, - Element::Code { value } => write!(w, "{}", HtmlEscape(value))?, - Element::FnRef(_fn_ref) => (), - Element::InlineCall(_) => (), - Element::Link(link) => write!( - w, - "{}", - HtmlEscape(&link.path), - HtmlEscape(link.desc.as_ref().unwrap_or(&link.path)), - )?, - Element::Macros(_macros) => (), - Element::RadioTarget => (), - Element::Snippet(snippet) => { - if snippet.name.eq_ignore_ascii_case("HTML") { - write!(w, "{}", snippet.value)?; - } - } - Element::Target(_target) => (), - Element::Text { value } => write!(w, "{}", HtmlEscape(value))?, - Element::Timestamp(timestamp) => { - write!( - &mut w, - "" - )?; + in_descriptive_list: Vec, - match timestamp { - Timestamp::Active { start, .. } => { - write_datetime(&mut w, "<", start, ">")?; - } - Timestamp::Inactive { start, .. } => { - write_datetime(&mut w, "[", start, "]")?; - } - Timestamp::ActiveRange { start, end, .. } => { - write_datetime(&mut w, "<", start, ">–")?; - write_datetime(&mut w, "<", end, ">")?; - } - Timestamp::InactiveRange { start, end, .. } => { - write_datetime(&mut w, "[", start, "]–")?; - write_datetime(&mut w, "[", end, "]")?; - } - Timestamp::Diary { value } => { - write!(&mut w, "<%%({})>", HtmlEscape(value))? - } - } - - write!(&mut w, "")?; - } - Element::Verbatim { value } => write!(&mut w, "{}", HtmlEscape(value))?, - Element::FnDef(_fn_def) => (), - Element::Clock(_clock) => (), - Element::Comment(_) => (), - Element::FixedWidth(fixed_width) => write!( - w, - "
      {}
      ", - HtmlEscape(&fixed_width.value) - )?, - Element::Keyword(_keyword) => (), - Element::Drawer(_drawer) => (), - Element::Rule(_) => write!(w, "
      ")?, - Element::Cookie(cookie) => write!(w, "{}", cookie.value)?, - Element::Title(title) => { - write!(w, "", if title.level <= 6 { title.level } else { 6 })?; - } - Element::Table(Table::TableEl { .. }) => (), - Element::Table(Table::Org { has_header, .. }) => { - write!(w, "")?; - if *has_header { - write!(w, "")?; - } else { - write!(w, "")?; - } - } - Element::TableRow(row) => match row { - TableRow::Body => write!(w, "")?, - TableRow::BodyRule => write!(w, "")?, - TableRow::Header => write!(w, "")?, - TableRow::HeaderRule => write!(w, "")?, - }, - Element::TableCell(cell) => match cell { - TableCell::Body => write!(w, "
      ")?, - TableCell::Header => write!(w, "")?, - }, - } - - Ok(()) - } - - fn end(&mut self, mut w: W, element: &Element) -> IOResult<()> { - match element { - // container elements - Element::SpecialBlock(_) => (), - Element::QuoteBlock(_) => write!(w, "")?, - Element::CenterBlock(_) => write!(w, "")?, - Element::VerseBlock(_) => write!(w, "

      ")?, - Element::Bold => write!(w, "")?, - Element::Document { .. } => write!(w, "")?, - Element::DynBlock(_dyn_block) => (), - Element::Headline { .. } => (), - Element::List(list) => { - if list.ordered { - write!(w, "")?; - } else { - write!(w, "")?; - } - } - Element::Italic => write!(w, "")?, - Element::ListItem(_) => write!(w, "")?, - Element::Paragraph { .. } => write!(w, "

      ")?, - Element::Section => write!(w, "")?, - Element::Strike => write!(w, "")?, - Element::Underline => write!(w, "")?, - Element::Title(title) => { - write!(w, "", if title.level <= 6 { title.level } else { 6 })? - } - Element::Table(Table::TableEl { .. }) => (), - Element::Table(Table::Org { .. }) => { - write!(w, "
      ")?; - } - Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => { - write!(w, "")?; - } - Element::TableCell(cell) => match cell { - TableCell::Body => write!(w, "")?, - TableCell::Header => write!(w, "")?, - }, - // non-container elements - _ => debug_assert!(!element.is_container()), - } - - Ok(()) - } + table_row: TableRow, } -#[cfg(feature = "syntect")] -mod syntect_handler { - use super::*; - use std::marker::PhantomData; +#[derive(Default, PartialEq, Eq)] +enum TableRow { + #[default] + HeaderRule, + Header, + BodyRule, + Body, +} - use syntect::{ - easy::HighlightLines, - highlighting::ThemeSet, - html::{styled_line_to_highlighted_html, IncludeBackground}, - parsing::SyntaxSet, - }; +impl HtmlExport { + pub fn push_str(&mut self, s: impl AsRef) { + self.output += s.as_ref(); + } - /// Syntect Html Handler - /// - /// Simple Usage: + pub fn finish(self) -> String { + self.output + } + + /// Render syntax node to html string /// /// ```rust - /// use orgize::Org; - /// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler}; + /// use orgize::{Org, ast::Bold, export::HtmlExport, rowan::ast::AstNode}; /// - /// let mut handler = SyntectHtmlHandler::new(DefaultHtmlHandler); - /// let org = Org::parse("src_rust{println!(\"Hello\")}"); - /// - /// let mut vec = vec![]; - /// - /// org.write_html_custom(&mut vec, &mut handler).unwrap(); + /// let org = Org::parse("* /hello/ *world*"); + /// let bold = org.first_node::().unwrap(); + /// let mut html = HtmlExport::default(); + /// html.render(bold.syntax()); + /// assert_eq!(html.finish(), "world"); /// ``` - /// - /// Customize: - /// - /// ```rust,no_run - /// // orgize has re-exported the whole syntect crate - /// use orgize::syntect::parsing::SyntaxSet; - /// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler}; - /// - /// let mut handler = SyntectHtmlHandler { - /// syntax_set: { - /// let set = SyntaxSet::load_defaults_newlines(); - /// let mut builder = set.into_builder(); - /// // add extra language syntax - /// builder.add_from_folder("path/to/syntax/dir", true).unwrap(); - /// builder.build() - /// }, - /// // specify theme - /// theme: String::from("Solarized (dark)"), - /// inner: DefaultHtmlHandler, - /// ..Default::default() - /// }; - /// - /// // Make sure to check if theme presents or it will panic at runtime - /// if handler.theme_set.themes.contains_key("dont-exists") { - /// - /// } - /// ``` - pub struct SyntectHtmlHandler, H: HtmlHandler> { - /// syntax set, default is `SyntaxSet::load_defaults_newlines()` - pub syntax_set: SyntaxSet, - /// theme set, default is `ThemeSet::load_defaults()` - pub theme_set: ThemeSet, - /// theme used for highlighting, default is `"InspiredGitHub"` - pub theme: String, - /// inner html handler - pub inner: H, - /// background color, default is `IncludeBackground::No` - pub background: IncludeBackground, - /// handler error type - pub error_type: PhantomData, - } - - impl, H: HtmlHandler> SyntectHtmlHandler { - pub fn new(inner: H) -> Self { - SyntectHtmlHandler { - inner, - ..Default::default() - } - } - - fn highlight(&self, language: Option<&str>, content: &str) -> String { - let mut highlighter = HighlightLines::new( - language - .and_then(|lang| self.syntax_set.find_syntax_by_token(lang)) - .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()), - &self.theme_set.themes[&self.theme], - ); - let regions = highlighter.highlight(content, &self.syntax_set); - styled_line_to_highlighted_html(®ions[..], self.background) - } - } - - impl, H: HtmlHandler> Default for SyntectHtmlHandler { - fn default() -> Self { - SyntectHtmlHandler { - syntax_set: SyntaxSet::load_defaults_newlines(), - theme_set: ThemeSet::load_defaults(), - theme: String::from("InspiredGitHub"), - inner: H::default(), - background: IncludeBackground::No, - error_type: PhantomData, - } - } - } - - impl, H: HtmlHandler> HtmlHandler for SyntectHtmlHandler { - fn start(&mut self, mut w: W, element: &Element) -> Result<(), E> { - match element { - Element::InlineSrc(inline_src) => write!( - w, - "{}", - self.highlight(Some(&inline_src.lang), &inline_src.body) - )?, - Element::SourceBlock(block) => { - if block.language.is_empty() { - write!(w, "
      {}
      ", block.contents)?; - } else { - write!( - w, - "
      {}
      ", - block.language, - self.highlight(Some(&block.language), &block.contents) - )?; - } - } - Element::FixedWidth(fixed_width) => write!( - w, - "
      {}
      ", - self.highlight(None, &fixed_width.value) - )?, - Element::ExampleBlock(block) => write!( - w, - "
      {}
      ", - self.highlight(None, &block.contents) - )?, - _ => self.inner.start(w, element)?, - } - Ok(()) - } - - fn end(&mut self, w: W, element: &Element) -> Result<(), E> { - self.inner.end(w, element) - } + pub fn render(&mut self, node: &SyntaxNode) { + let mut ctx = TraversalContext::default(); + self.element(SyntaxElement::Node(node.clone()), &mut ctx); } } -#[cfg(feature = "syntect")] -pub use syntect_handler::SyntectHtmlHandler; +impl Traverser for HtmlExport { + fn event(&mut self, event: Event, ctx: &mut TraversalContext) { + match event { + Event::Enter(Container::Document(_)) => self.output += "
      ", + Event::Leave(Container::Document(_)) => self.output += "
      ", + + Event::Enter(Container::Headline(headline)) => { + let level = min(headline.level(), 6); + let _ = write!(&mut self.output, ""); + for elem in headline.title() { + self.element(elem, ctx); + } + let _ = write!(&mut self.output, ""); + } + Event::Leave(Container::Headline(_)) => {} + + Event::Enter(Container::Paragraph(_)) => self.output += "

      ", + Event::Leave(Container::Paragraph(_)) => self.output += "

      ", + + Event::Enter(Container::Section(_)) => self.output += "
      ", + Event::Leave(Container::Section(_)) => self.output += "
      ", + + Event::Enter(Container::Italic(_)) => self.output += "", + Event::Leave(Container::Italic(_)) => self.output += "", + + Event::Enter(Container::Bold(_)) => self.output += "", + Event::Leave(Container::Bold(_)) => self.output += "", + + Event::Enter(Container::Strike(_)) => self.output += "", + Event::Leave(Container::Strike(_)) => self.output += "", + + Event::Enter(Container::Underline(_)) => self.output += "", + Event::Leave(Container::Underline(_)) => self.output += "", + + Event::Enter(Container::Verbatim(_)) => self.output += "", + Event::Leave(Container::Verbatim(_)) => self.output += "", + + Event::Enter(Container::Code(_)) => self.output += "", + Event::Leave(Container::Code(_)) => self.output += "", + + Event::Enter(Container::SourceBlock(block)) => { + if let Some(language) = block.language() { + let _ = write!( + &mut self.output, + r#"
      "#,
      +                        HtmlEscape(&language)
      +                    );
      +                } else {
      +                    self.output += r#"
      "#
      +                }
      +            }
      +            Event::Leave(Container::SourceBlock(_)) => self.output += "
      ", + + Event::Enter(Container::QuoteBlock(_)) => self.output += "
      ", + Event::Leave(Container::QuoteBlock(_)) => self.output += "
      ", + + Event::Enter(Container::VerseBlock(_)) => self.output += "

      ", + Event::Leave(Container::VerseBlock(_)) => self.output += "

      ", + + Event::Enter(Container::ExampleBlock(_)) => self.output += "
      ",
      +            Event::Leave(Container::ExampleBlock(_)) => self.output += "
      ", + + Event::Enter(Container::CenterBlock(_)) => self.output += "
      ", + Event::Leave(Container::CenterBlock(_)) => self.output += "
      ", + + Event::Enter(Container::CommentBlock(_)) => self.output += "", + + Event::Enter(Container::Comment(_)) => self.output += "", + + Event::Enter(Container::Subscript(_)) => self.output += "", + Event::Leave(Container::Subscript(_)) => self.output += "", + + Event::Enter(Container::Superscript(_)) => self.output += "", + Event::Leave(Container::Superscript(_)) => self.output += "", + + Event::Enter(Container::List(list)) => { + self.output += if list.is_ordered() { + self.in_descriptive_list.push(false); + "
        " + } else if list.is_descriptive() { + self.in_descriptive_list.push(true); + "
        " + } else { + self.in_descriptive_list.push(false); + "
          " + }; + } + Event::Leave(Container::List(list)) => { + self.output += if list.is_ordered() { + "
      " + } else if let Some(true) = self.in_descriptive_list.last() { + "" + } else { + "
    " + }; + self.in_descriptive_list.pop(); + } + Event::Enter(Container::ListItem(list_item)) => { + if let Some(&true) = self.in_descriptive_list.last() { + self.output += "
    "; + for elem in list_item.tag() { + self.element(elem, ctx); + } + self.output += "
    "; + } else { + self.output += "
  1. "; + } + } + Event::Leave(Container::ListItem(_)) => { + if let Some(&true) = self.in_descriptive_list.last() { + self.output += "
  2. "; + } else { + self.output += ""; + } + } + + Event::Enter(Container::OrgTable(table)) => { + self.output += ""; + self.table_row = if table.has_header() { + TableRow::HeaderRule + } else { + TableRow::BodyRule + } + } + Event::Leave(Container::OrgTable(_)) => { + match self.table_row { + TableRow::Body => self.output += "", + TableRow::Header => self.output += "", + _ => {} + } + self.output += "
    "; + } + Event::Enter(Container::OrgTableRow(row)) => { + if row.is_rule() { + match self.table_row { + TableRow::Body => { + self.output += ""; + self.table_row = TableRow::BodyRule; + } + TableRow::Header => { + self.output += ""; + self.table_row = TableRow::BodyRule; + } + _ => {} + } + ctx.skip(); + } else { + match self.table_row { + TableRow::HeaderRule => { + self.table_row = TableRow::Header; + self.output += ""; + } + TableRow::BodyRule => { + self.table_row = TableRow::Body; + self.output += ""; + } + _ => {} + } + self.output += ""; + } + } + Event::Leave(Container::OrgTableRow(row)) => { + if row.is_rule() { + match self.table_row { + TableRow::Body => { + self.output += ""; + self.table_row = TableRow::BodyRule; + } + TableRow::Header => { + self.output += ""; + self.table_row = TableRow::BodyRule; + } + _ => {} + } + ctx.skip(); + } else { + self.output += ""; + } + } + Event::Enter(Container::OrgTableCell(_)) => self.output += "", + Event::Leave(Container::OrgTableCell(_)) => self.output += "", + + Event::Enter(Container::Link(link)) => { + let path = link.path(); + let path = path.trim_start_matches("file:"); + + if link.is_image() { + let _ = write!(&mut self.output, r#""#, HtmlEscape(&path)); + return ctx.skip(); + } + + let _ = write!(&mut self.output, r#""#, HtmlEscape(&path)); + + if !link.has_description() { + let _ = write!(&mut self.output, "{}", HtmlEscape(&path)); + ctx.skip(); + } + } + Event::Leave(Container::Link(_)) => self.output += "", + + Event::Text(text) => { + let _ = write!(&mut self.output, "{}", HtmlEscape(text)); + } + + Event::LineBreak(_) => self.output += "
    ", + + Event::Snippet(snippet) => { + if snippet.backend().eq_ignore_ascii_case("html") { + self.output += &snippet.value(); + } + } + + Event::Rule(_) => self.output += "
    ", + + Event::Timestamp(timestamp) => { + self.output += r#""#; + for e in timestamp.syntax.children_with_tokens() { + match e { + NodeOrToken::Token(t) if t.kind() == SyntaxKind::MINUS2 => { + self.output += "–"; + } + NodeOrToken::Token(t) => { + self.output += t.text(); + } + _ => {} + } + } + self.output += r#""#; + } + + Event::LatexFragment(latex) => { + let _ = write!(&mut self.output, "{}", &latex.syntax); + } + Event::LatexEnvironment(latex) => { + let _ = write!(&mut self.output, "{}", &latex.syntax); + } + + // ignores keyword + Event::Enter(Container::Keyword(_)) => ctx.skip(), + + Event::Entity(entity) => self.output += entity.html(), + + _ => {} + } + } +} diff --git a/src/export/markdown.rs b/src/export/markdown.rs new file mode 100644 index 0000000..b75b1ee --- /dev/null +++ b/src/export/markdown.rs @@ -0,0 +1,186 @@ +use std::cmp::min; +use std::fmt::Write as _; + +use crate::{SyntaxElement, SyntaxNode}; + +use super::event::{Container, Event}; +use super::TraversalContext; +use super::Traverser; + +#[derive(Default)] +pub struct MarkdownExport { + output: String, + + inside_blockquote: bool, +} + +impl MarkdownExport { + pub fn push_str(&mut self, s: impl AsRef) { + self.output += s.as_ref(); + } + + /// Render syntax node to markdown string + /// + /// ```rust + /// use orgize::{Org, ast::Bold, export::MarkdownExport, rowan::ast::AstNode}; + /// + /// let org = Org::parse("* /hello/ *world*"); + /// let bold = org.first_node::().unwrap(); + /// let mut markdown = MarkdownExport::default(); + /// markdown.render(bold.syntax()); + /// assert_eq!(markdown.finish(), "**world**"); + /// ``` + pub fn render(&mut self, node: &SyntaxNode) { + let mut ctx = TraversalContext::default(); + self.element(SyntaxElement::Node(node.clone()), &mut ctx); + } + + pub fn finish(self) -> String { + self.output + } + + fn follows_newline(&mut self) { + if !self.output.is_empty() && !self.output.ends_with(['\n', '\r']) { + self.output += "\n"; + } + } +} + +impl Traverser for MarkdownExport { + fn event(&mut self, event: Event, ctx: &mut TraversalContext) { + match event { + Event::Enter(Container::Document(_)) => {} + Event::Leave(Container::Document(_)) => {} + + Event::Enter(Container::Headline(headline)) => { + self.follows_newline(); + let level = min(headline.level(), 6); + let _ = write!(&mut self.output, "{} ", "#".repeat(level)); + for elem in headline.title() { + self.element(elem, ctx); + } + } + Event::Leave(Container::Headline(_)) => {} + + Event::Enter(Container::Paragraph(_)) => {} + Event::Leave(Container::Paragraph(_)) => self.output += "\n", + + Event::Enter(Container::Section(_)) => self.follows_newline(), + Event::Leave(Container::Section(_)) => {} + + Event::Enter(Container::Italic(_)) => self.output += "*", + Event::Leave(Container::Italic(_)) => self.output += "*", + + Event::Enter(Container::Bold(_)) => self.output += "**", + Event::Leave(Container::Bold(_)) => self.output += "**", + + Event::Enter(Container::Strike(_)) => self.output += "~~", + Event::Leave(Container::Strike(_)) => self.output += "~~", + + Event::Enter(Container::Underline(_)) => {} + Event::Leave(Container::Underline(_)) => {} + + Event::Enter(Container::Verbatim(_)) + | Event::Leave(Container::Verbatim(_)) + | Event::Enter(Container::Code(_)) + | Event::Leave(Container::Code(_)) => self.output += "`", + + Event::Enter(Container::SourceBlock(block)) => { + self.follows_newline(); + self.output += "```"; + if let Some(language) = block.language() { + self.output += &language; + } + } + Event::Leave(Container::SourceBlock(_)) => self.output += "```\n", + + Event::Enter(Container::QuoteBlock(_)) => { + self.inside_blockquote = true; + self.follows_newline(); + self.output += "> "; + } + Event::Leave(Container::QuoteBlock(_)) => self.inside_blockquote = false, + + Event::Enter(Container::CommentBlock(_)) => self.output += "", + + Event::Enter(Container::Comment(_)) => self.output += "", + + Event::Enter(Container::Subscript(_)) => self.output += "", + Event::Leave(Container::Subscript(_)) => self.output += "", + + Event::Enter(Container::Superscript(_)) => self.output += "", + Event::Leave(Container::Superscript(_)) => self.output += "", + + Event::Enter(Container::List(_list)) => {} + Event::Leave(Container::List(_list)) => {} + + Event::Enter(Container::ListItem(list_item)) => { + self.follows_newline(); + self.output += &" ".repeat(list_item.indent()); + self.output += &list_item.bullet(); + } + Event::Leave(Container::ListItem(_)) => {} + + Event::Enter(Container::OrgTable(_table)) => {} + Event::Leave(Container::OrgTable(_)) => {} + Event::Enter(Container::OrgTableRow(_row)) => {} + Event::Leave(Container::OrgTableRow(_row)) => {} + Event::Enter(Container::OrgTableCell(_)) => {} + Event::Leave(Container::OrgTableCell(_)) => {} + + Event::Enter(Container::Link(link)) => { + let path = link.path(); + let path = path.trim_start_matches("file:"); + + if link.is_image() { + let _ = write!(&mut self.output, "![]({path})"); + return ctx.skip(); + } + + if !link.has_description() { + let _ = write!(&mut self.output, r#"[{}]({})"#, &path, &path); + return ctx.skip(); + } + + self.output += "["; + } + Event::Leave(Container::Link(link)) => { + let _ = write!(&mut self.output, r#"]({})"#, &*link.path()); + } + + Event::Text(text) => { + if self.inside_blockquote { + for (idx, line) in text.split('\n').enumerate() { + if idx != 0 { + self.output += "\n> "; + } + self.output += line; + } + } else { + self.output += &*text; + } + } + + Event::LineBreak(_) => {} + + Event::Snippet(_snippet) => {} + + Event::Rule(_) => self.output += "\n-----\n", + + Event::Timestamp(_timestamp) => {} + + Event::LatexFragment(latex) => { + let _ = write!(&mut self.output, "{}", &latex.syntax); + } + Event::LatexEnvironment(latex) => { + let _ = write!(&mut self.output, "{}", &latex.syntax); + } + + Event::Entity(entity) => self.output += entity.utf8(), + + _ => {} + } + } +} diff --git a/src/export/mod.rs b/src/export/mod.rs index 4eddfba..afada80 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -1,31 +1,11 @@ //! Export `Org` struct to various formats. +mod event; mod html; -mod org; +mod markdown; +mod traverse; -#[cfg(feature = "syntect")] -pub use html::SyntectHtmlHandler; -pub use html::{DefaultHtmlHandler, HtmlEscape, HtmlHandler}; -pub use org::{DefaultOrgHandler, OrgHandler}; - -use std::io::{Error, Write}; - -use crate::elements::Datetime; - -pub(crate) fn write_datetime( - mut w: W, - start: &str, - datetime: &Datetime, - end: &str, -) -> Result<(), Error> { - write!(w, "{}", start)?; - write!( - w, - "{}-{:02}-{:02} {}", - datetime.year, datetime.month, datetime.day, datetime.dayname - )?; - if let (Some(hour), Some(minute)) = (datetime.hour, datetime.minute) { - write!(w, " {:02}:{:02}", hour, minute)?; - } - write!(w, "{}", end) -} +pub use event::{Container, Event}; +pub use html::{HtmlEscape, HtmlExport}; +pub use markdown::MarkdownExport; +pub use traverse::{from_fn, from_fn_with_ctx, FromFn, FromFnWithCtx, TraversalContext, Traverser}; diff --git a/src/export/org.rs b/src/export/org.rs deleted file mode 100644 index 7e860d8..0000000 --- a/src/export/org.rs +++ /dev/null @@ -1,321 +0,0 @@ -use std::io::{Error, Result as IOResult, Write}; - -use crate::elements::{Clock, Element, Table, Timestamp}; -use crate::export::write_datetime; - -pub trait OrgHandler>: Default { - fn start(&mut self, w: W, element: &Element) -> Result<(), E>; - fn end(&mut self, w: W, element: &Element) -> Result<(), E>; -} - -#[derive(Default)] -pub struct DefaultOrgHandler; - -impl OrgHandler for DefaultOrgHandler { - fn start(&mut self, mut w: W, element: &Element) -> IOResult<()> { - match element { - // container elements - Element::SpecialBlock(block) => { - writeln!(w, "#+BEGIN_{}", block.name)?; - write_blank_lines(&mut w, block.pre_blank)?; - } - Element::QuoteBlock(block) => { - writeln!(&mut w, "#+BEGIN_QUOTE")?; - write_blank_lines(&mut w, block.pre_blank)?; - } - Element::CenterBlock(block) => { - writeln!(&mut w, "#+BEGIN_CENTER")?; - write_blank_lines(&mut w, block.pre_blank)?; - } - Element::VerseBlock(block) => { - writeln!(&mut w, "#+BEGIN_VERSE")?; - write_blank_lines(&mut w, block.pre_blank)?; - } - Element::Bold => write!(w, "*")?, - Element::Document { pre_blank } => { - write_blank_lines(w, *pre_blank)?; - } - Element::DynBlock(dyn_block) => { - write!(&mut w, "#+BEGIN: {}", dyn_block.block_name)?; - if let Some(parameters) = &dyn_block.arguments { - write!(&mut w, " {}", parameters)?; - } - write_blank_lines(&mut w, dyn_block.pre_blank + 1)?; - } - Element::Headline { .. } => (), - Element::List(_list) => (), - Element::Italic => write!(w, "/")?, - Element::ListItem(list_item) => { - for _ in 0..list_item.indent { - write!(&mut w, " ")?; - } - write!(&mut w, "{}", list_item.bullet)?; - } - Element::Paragraph { .. } => (), - Element::Section => (), - Element::Strike => write!(w, "+")?, - Element::Underline => write!(w, "_")?, - Element::Drawer(drawer) => { - writeln!(&mut w, ":{}:", drawer.name)?; - write_blank_lines(&mut w, drawer.pre_blank)?; - } - // non-container elements - Element::CommentBlock(block) => { - writeln!(&mut w, "#+BEGIN_COMMENT")?; - write!(&mut w, "{}", block.contents)?; - writeln!(&mut w, "#+END_COMMENT")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::ExampleBlock(block) => { - writeln!(&mut w, "#+BEGIN_EXAMPLE")?; - write!(&mut w, "{}", block.contents)?; - writeln!(&mut w, "#+END_EXAMPLE")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::ExportBlock(block) => { - writeln!(&mut w, "#+BEGIN_EXPORT {}", block.data)?; - write!(&mut w, "{}", block.contents)?; - writeln!(&mut w, "#+END_EXPORT")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::SourceBlock(block) => { - writeln!(&mut w, "#+BEGIN_SRC {}", block.language)?; - write!(&mut w, "{}", block.contents)?; - writeln!(&mut w, "#+END_SRC")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::BabelCall(call) => { - writeln!(&mut w, "#+CALL: {}", call.value)?; - write_blank_lines(w, call.post_blank)?; - } - Element::InlineSrc(inline_src) => { - write!(&mut w, "src_{}", inline_src.lang)?; - if let Some(options) = &inline_src.options { - write!(&mut w, "[{}]", options)?; - } - write!(&mut w, "{{{}}}", inline_src.body)?; - } - Element::Code { value } => write!(w, "~{}~", value)?, - Element::FnRef(fn_ref) => { - write!(&mut w, "[fn:{}", fn_ref.label)?; - if let Some(definition) = &fn_ref.definition { - write!(&mut w, ":{}", definition)?; - } - write!(&mut w, "]")?; - } - Element::InlineCall(inline_call) => { - write!(&mut w, "call_{}", inline_call.name)?; - if let Some(header) = &inline_call.inside_header { - write!(&mut w, "[{}]", header)?; - } - write!(&mut w, "({})", inline_call.arguments)?; - if let Some(header) = &inline_call.end_header { - write!(&mut w, "[{}]", header)?; - } - } - Element::Link(link) => { - write!(&mut w, "[[{}]", link.path)?; - if let Some(desc) = &link.desc { - write!(&mut w, "[{}]", desc)?; - } - write!(&mut w, "]")?; - } - Element::Macros(_macros) => (), - Element::RadioTarget => (), - Element::Snippet(snippet) => write!(w, "@@{}:{}@@", snippet.name, snippet.value)?, - Element::Target(_target) => (), - Element::Text { value } => write!(w, "{}", value)?, - Element::Timestamp(timestamp) => { - write_timestamp(&mut w, ×tamp)?; - } - Element::Verbatim { value } => write!(w, "={}=", value)?, - Element::FnDef(fn_def) => { - write_blank_lines(w, fn_def.post_blank)?; - } - Element::Clock(clock) => { - write!(w, "CLOCK: ")?; - - match clock { - Clock::Closed { - start, - end, - duration, - post_blank, - .. - } => { - write_datetime(&mut w, "[", &start, "]--")?; - write_datetime(&mut w, "[", &end, "]")?; - writeln!(&mut w, " => {}", duration)?; - write_blank_lines(&mut w, *post_blank)?; - } - Clock::Running { - start, post_blank, .. - } => { - write_datetime(&mut w, "[", &start, "]\n")?; - write_blank_lines(&mut w, *post_blank)?; - } - } - } - Element::Comment(comment) => { - write!(w, "{}", comment.value)?; - write_blank_lines(&mut w, comment.post_blank)?; - } - Element::FixedWidth(fixed_width) => { - write!(&mut w, "{}", fixed_width.value)?; - write_blank_lines(&mut w, fixed_width.post_blank)?; - } - Element::Keyword(keyword) => { - write!(&mut w, "#+{}", keyword.key)?; - if let Some(optional) = &keyword.optional { - write!(&mut w, "[{}]", optional)?; - } - writeln!(&mut w, ": {}", keyword.value)?; - write_blank_lines(&mut w, keyword.post_blank)?; - } - Element::Rule(rule) => { - writeln!(w, "-----")?; - write_blank_lines(&mut w, rule.post_blank)?; - } - Element::Cookie(_cookie) => (), - Element::Title(title) => { - for _ in 0..title.level { - write!(&mut w, "*")?; - } - if let Some(keyword) = &title.keyword { - write!(&mut w, " {}", keyword)?; - } - if let Some(priority) = title.priority { - write!(&mut w, " [#{}]", priority)?; - } - write!(&mut w, " ")?; - } - Element::Table(_) => (), - Element::TableRow(_) => (), - Element::TableCell(_) => (), - } - - Ok(()) - } - - fn end(&mut self, mut w: W, element: &Element) -> IOResult<()> { - match element { - // container elements - Element::SpecialBlock(block) => { - writeln!(&mut w, "#+END_{}", block.name)?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::QuoteBlock(block) => { - writeln!(&mut w, "#+END_QUOTE")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::CenterBlock(block) => { - writeln!(&mut w, "#+END_CENTER")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::VerseBlock(block) => { - writeln!(&mut w, "#+END_VERSE")?; - write_blank_lines(&mut w, block.post_blank)?; - } - Element::Bold => write!(w, "*")?, - Element::Document { .. } => (), - Element::DynBlock(dyn_block) => { - writeln!(w, "#+END:")?; - write_blank_lines(w, dyn_block.post_blank)?; - } - Element::Headline { .. } => (), - Element::List(list) => { - write_blank_lines(w, list.post_blank)?; - } - Element::Italic => write!(w, "/")?, - Element::ListItem(_) => (), - Element::Paragraph { post_blank } => { - write_blank_lines(w, post_blank + 1)?; - } - Element::Section => (), - Element::Strike => write!(w, "+")?, - Element::Underline => write!(w, "_")?, - Element::Drawer(drawer) => { - writeln!(&mut w, ":END:")?; - write_blank_lines(&mut w, drawer.post_blank)?; - } - Element::Title(title) => { - if !title.tags.is_empty() { - write!(&mut w, " :")?; - for tag in &title.tags { - write!(&mut w, "{}:", tag)?; - } - } - writeln!(&mut w)?; - if let Some(planning) = &title.planning { - if let Some(scheduled) = &planning.scheduled { - write!(&mut w, "SCHEDULED: ")?; - write_timestamp(&mut w, &scheduled)?; - } - if let Some(deadline) = &planning.deadline { - if planning.scheduled.is_some() { - write!(&mut w, " ")?; - } - write!(&mut w, "DEADLINE: ")?; - write_timestamp(&mut w, &deadline)?; - } - if let Some(closed) = &planning.closed { - if planning.deadline.is_some() { - write!(&mut w, " ")?; - } - write!(&mut w, "CLOSED: ")?; - write_timestamp(&mut w, &closed)?; - } - writeln!(&mut w)?; - } - if !title.properties.is_empty() { - writeln!(&mut w, ":PROPERTIES:")?; - for (key, value) in &title.properties { - writeln!(&mut w, ":{}: {}", key, value)?; - } - writeln!(&mut w, ":END:")?; - } - write_blank_lines(&mut w, title.post_blank)?; - } - Element::Table(Table::Org { post_blank, .. }) => { - write_blank_lines(w, *post_blank)?; - } - Element::Table(Table::TableEl { post_blank, .. }) => { - write_blank_lines(w, *post_blank)?; - } - Element::TableRow(_) => (), - Element::TableCell(_) => (), - // non-container elements - _ => debug_assert!(!element.is_container()), - } - - Ok(()) - } -} - -fn write_blank_lines(mut w: W, count: usize) -> Result<(), Error> { - for _ in 0..count { - writeln!(w)?; - } - Ok(()) -} - -fn write_timestamp(mut w: W, timestamp: &Timestamp) -> Result<(), Error> { - match timestamp { - Timestamp::Active { start, .. } => { - write_datetime(w, "<", start, ">")?; - } - Timestamp::Inactive { start, .. } => { - write_datetime(w, "[", start, "]")?; - } - Timestamp::ActiveRange { start, end, .. } => { - write_datetime(&mut w, "<", start, ">--")?; - write_datetime(&mut w, "<", end, ">")?; - } - Timestamp::InactiveRange { start, end, .. } => { - write_datetime(&mut w, "[", start, "]--")?; - write_datetime(&mut w, "[", end, "]")?; - } - Timestamp::Diary { value } => write!(w, "<%%({})>", value)?, - } - Ok(()) -} diff --git a/src/export/traverse.rs b/src/export/traverse.rs new file mode 100644 index 0000000..d53b7b8 --- /dev/null +++ b/src/export/traverse.rs @@ -0,0 +1,282 @@ +use crate::ast::*; +use crate::syntax::{SyntaxElement, SyntaxKind}; +use rowan::ast::AstNode; +use SyntaxKind::*; + +use super::event::{Container, Event}; + +#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)] +enum TraversalControl { + Up, + Stop, + Skip, + #[default] + Continue, +} + +#[derive(Default)] +pub struct TraversalContext { + control: TraversalControl, +} + +impl TraversalContext { + /// Stops traversal completely + pub fn stop(&mut self) { + self.control = TraversalControl::Stop; + } + /// Skips traversal of the current node's siblings + pub fn up(&mut self) { + self.control = TraversalControl::Up; + } + /// Skips traversal of the current node's descendants + pub fn skip(&mut self) { + self.control = TraversalControl::Skip; + } + /// Continues traversal + pub fn r#continue(&mut self) { + self.control = TraversalControl::Continue; + } +} + +/// A trait for enumerating org syntax tree +/// +/// ### `TraversalContext` +/// +/// `TraversalContext` can be used to control the traversal. +/// +/// For example, `ctx.skip()` will skips the traversal for current +/// element and its descendants and improve the traversal performance. +/// +/// ```rust +/// use orgize::{ +/// export::{Container, Event, HtmlExport, TraversalContext, Traverser}, +/// Org, +/// }; +/// use slugify::slugify; +/// +/// #[derive(Default)] +/// struct Toc(HtmlExport); +/// +/// impl Traverser for Toc { +/// fn event(&mut self, event: Event, ctx: &mut TraversalContext) { +/// match event { +/// Event::Enter(Container::Headline(headline)) => { +/// let title = headline.title().map(|e| e.to_string()).collect::(); +/// self.0.push_str(&format!("", slugify!(&title))); +/// for elem in headline.title() { +/// self.element(elem, ctx); +/// } +/// self.0.push_str(""); +/// if headline.headlines().count() > 0 { +/// self.0.push_str("
      "); +/// } +/// } +/// Event::Leave(Container::Headline(headline)) => { +/// if headline.headlines().count() > 0 { +/// self.0.push_str("
    "); +/// } +/// } +/// Event::Enter(Container::Section(_)) | Event::Leave(Container::Section(_)) => ctx.skip(), +/// Event::Enter(Container::Document(_)) | Event::Leave(Container::Document(_)) => {} +/// _ => self.0.event(event, ctx), +/// } +/// } +/// } +/// +/// let org = Org::parse(r#" +/// * heading 1 +/// section 1 +/// ** heading 1.1 +/// ** heading 1.2 +/// * heading 2 +/// section 2 +/// * heading 3 +/// **** heading 3.1"#); +/// let mut toc = Toc::default(); +/// org.traverse(&mut toc); +/// assert_eq!(toc.0.finish(), "\ +/// heading 1\ +/// \ +/// heading 2\ +/// heading 3\ +/// "); +/// ``` +pub trait Traverser { + /// Handles traversal event + fn event(&mut self, event: Event, ctx: &mut TraversalContext); + + fn element(&mut self, element: SyntaxElement, ctx: &mut TraversalContext) { + macro_rules! take_control { + () => { + match ctx.control { + TraversalControl::Stop => { + ctx.control = TraversalControl::Stop; + return; + } + TraversalControl::Up => { + ctx.control = TraversalControl::Skip; + return; + } + TraversalControl::Skip => { + ctx.control = TraversalControl::Continue; + return; + } + TraversalControl::Continue => {} + } + }; + } + + match element { + SyntaxElement::Node(node) => { + macro_rules! walk { + ($ast:ident) => {{ + debug_assert!($ast::can_cast(node.kind())); + let node = $ast { syntax: node }; + self.event(Event::Enter(Container::$ast(node.clone())), ctx); + take_control!(); + for child in node.syntax.children_with_tokens() { + self.element(child, ctx); + take_control!(); + } + self.event(Event::Leave(Container::$ast(node.clone())), ctx); + take_control!(); + }}; + (@$ast:ident) => {{ + debug_assert!($ast::can_cast(node.kind())); + let node = $ast { syntax: node }; + self.event(Event::$ast(node), ctx); + take_control!(); + }}; + } + + match node.kind() { + DOCUMENT => walk!(Document), + HEADLINE => walk!(Headline), + SECTION => walk!(Section), + PARAGRAPH => walk!(Paragraph), + BOLD => walk!(Bold), + ITALIC => walk!(Italic), + STRIKE => walk!(Strike), + UNDERLINE => walk!(Underline), + LIST => walk!(List), + LIST_ITEM => walk!(ListItem), + CODE => walk!(Code), + INLINE_CALL => walk!(@InlineCall), + INLINE_SRC => walk!(@InlineSrc), + RULE => walk!(@Rule), + VERBATIM => walk!(Verbatim), + SPECIAL_BLOCK => walk!(SpecialBlock), + QUOTE_BLOCK => walk!(QuoteBlock), + CENTER_BLOCK => walk!(CenterBlock), + VERSE_BLOCK => walk!(VerseBlock), + COMMENT_BLOCK => walk!(CommentBlock), + EXAMPLE_BLOCK => walk!(ExampleBlock), + EXPORT_BLOCK => walk!(ExportBlock), + SOURCE_BLOCK => walk!(SourceBlock), + BABEL_CALL => walk!(BabelCall), + CLOCK => walk!(@Clock), + COOKIE => walk!(@Cookie), + RADIO_TARGET => walk!(RadioTarget), + DRAWER => walk!(Drawer), + DYN_BLOCK => walk!(DynBlock), + FN_DEF => walk!(FnDef), + FN_REF => walk!(FnRef), + MACROS => walk!(@Macros), + SNIPPET => walk!(@Snippet), + TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp), + TARGET => walk!(Target), + COMMENT => walk!(Comment), + FIXED_WIDTH => walk!(FixedWidth), + ORG_TABLE => walk!(OrgTable), + ORG_TABLE_RULE_ROW | ORG_TABLE_STANDARD_ROW => walk!(OrgTableRow), + ORG_TABLE_CELL => walk!(OrgTableCell), + LINK => walk!(Link), + LATEX_FRAGMENT => walk!(@LatexFragment), + LATEX_ENVIRONMENT => walk!(@LatexEnvironment), + ENTITY => walk!(@Entity), + LINE_BREAK => walk!(@LineBreak), + SUPERSCRIPT => walk!(Superscript), + SUBSCRIPT => walk!(Subscript), + KEYWORD => walk!(Keyword), + PROPERTY_DRAWER => walk!(PropertyDrawer), + #[cfg(feature = "syntax-org-fc")] + CLOZE => walk!(@Cloze), + BLOCK_CONTENT | LIST_ITEM_CONTENT => { + for child in node.children_with_tokens() { + self.element(child, ctx); + take_control!(); + } + } + _ => {} + } + } + SyntaxElement::Token(token) => { + if token.kind() == TEXT { + self.event(Event::Text(Token(token)), ctx); + take_control!(); + } + } + }; + } +} + +pub struct FromFn(F); + +impl Traverser for FromFn { + fn event(&mut self, event: Event, _: &mut TraversalContext) { + (self.0)(event) + } +} + +pub struct FromFnWithCtx(F); + +impl Traverser for FromFnWithCtx { + fn event(&mut self, event: Event, ctx: &mut TraversalContext) { + (self.0)(event, ctx) + } +} + +/// A helper for creating traverser +/// +/// ```rust +/// use orgize::{ +/// export::{from_fn, Container, Event, Traverser}, +/// Org, +/// }; +/// +/// let mut count = 0; +/// let mut handler = from_fn(|event| { +/// if matches!(event, Event::Enter(Container::Headline(_))) { +/// count += 1; +/// } +/// }); +/// Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler); +/// assert_eq!(count, 3); +/// ``` +pub fn from_fn(f: F) -> FromFn { + FromFn(f) +} + +/// A helper for creating traverser +/// +/// ```rust +/// use orgize::{ +/// export::{from_fn_with_ctx, Container, Event, Traverser}, +/// Org, +/// }; +/// +/// let mut count = 0; +/// let mut handler = from_fn_with_ctx(|event, ctx| { +/// if let Event::Enter(Container::Headline(hdl)) = event { +/// count += 1; +/// if &hdl.title_raw() == "cow" { +/// ctx.stop(); +/// } +/// } +/// }); +/// Org::parse("* 1\n* cow\n* 3").traverse(&mut handler); +/// assert_eq!(count, 2); +/// ``` +pub fn from_fn_with_ctx(f: F) -> FromFnWithCtx { + FromFnWithCtx(f) +} diff --git a/src/headline.rs b/src/headline.rs deleted file mode 100644 index 49c3617..0000000 --- a/src/headline.rs +++ /dev/null @@ -1,1219 +0,0 @@ -use indextree::NodeId; -use std::borrow::Cow; -use std::ops::RangeInclusive; -use std::usize; - -use crate::{ - config::ParseConfig, - elements::{Element, Title}, - parsers::{parse_container, Container, OwnedArena}, - validate::{ValidationError, ValidationResult}, - Org, -}; - -/// Represents the document in `Org` struct. -/// -/// Each `Org` struct only has one `Document`. -#[derive(Copy, Clone, Debug)] -pub struct Document { - doc_n: NodeId, - sec_n: Option, -} - -impl Document { - pub(crate) fn from_org(org: &Org) -> Document { - let sec_n = org.arena[org.root] - .first_child() - .and_then(|n| match org[n] { - Element::Section => Some(n), - Element::Headline { .. } => None, - _ => unreachable!("Document should only contains section and headline."), - }); - - Document { - doc_n: org.root, - sec_n, - } - } - - /// Returns the ID of the section element of this document, - /// or `None` if it has no section. - pub fn section_node(self) -> Option { - self.sec_n - } - - /// Returns an iterator of this document's children. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// ** h1 - /// ** h2 - /// *** h2_1 - /// *** h2_2 - /// ** h3 - /// "#, - /// ); - /// - /// let d = org.document(); - /// - /// let mut iter = d.children(&org); - /// - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1"); - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h2"); - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h3"); - /// assert!(iter.next().is_none()); - /// ``` - pub fn children<'a>(self, org: &'a Org) -> impl Iterator + 'a { - self.doc_n - .children(&org.arena) - // skip section if exists - .skip(if self.sec_n.is_some() { 1 } else { 0 }) - .map(move |n| match org[n] { - Element::Headline { level } => Headline::from_node(n, level, org), - _ => unreachable!(), - }) - } - - /// Returns the first child of this document, or `None` if it has no child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// ** h1 - /// ** h2 - /// *** h2_1 - /// *** h2_2 - /// ** h3 - /// "#, - /// ); - /// - /// let d = org.document(); - /// - /// assert_eq!(d.first_child(&org).unwrap().title(&org).raw, "h1"); - /// ``` - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let org = Org::new(); - /// - /// assert!(org.document().first_child(&org).is_none()); - /// ``` - pub fn first_child(self, org: &Org) -> Option { - self.doc_n - .children(&org.arena) - // skip section if exists - .nth(if self.sec_n.is_some() { 1 } else { 0 }) - .map(move |n| match org[n] { - Element::Headline { level } => Headline::from_node(n, level, org), - _ => unreachable!(), - }) - } - - /// Returns the last child of this document, or `None` if it has no child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let d = org.document(); - /// - /// assert_eq!(d.last_child(&org).unwrap().title(&org).raw, "h1_3"); - /// ``` - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let org = Org::new(); - /// - /// assert!(org.document().last_child(&org).is_none()); - /// ``` - pub fn last_child(self, org: &Org) -> Option { - org.arena[self.doc_n] - .last_child() - .and_then(|n| match org[n] { - Element::Headline { level } => Some(Headline::from_node(n, level, org)), - Element::Section => None, - _ => unreachable!("Document should only contains section and headline."), - }) - } - - /// Changes the section content of this document. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// ** h1_1 - /// ** h1_2 - /// "#, - /// ); - /// - /// let mut d = org.document(); - /// - /// d.set_section_content("s", &mut org); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// s - /// ** h1_1 - /// ** h1_2 - /// "#, - /// ); - /// ``` - pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) - where - S: Into>, - { - if let Some(sec_n) = self.sec_n { - let children: Vec<_> = sec_n.children(&org.arena).collect(); - for child in children { - child.detach(&mut org.arena); - } - } else { - let sec_n = org.arena.new_node(Element::Section); - self.sec_n = Some(sec_n); - self.doc_n.prepend(sec_n, &mut org.arena); - } - - match content.into() { - Cow::Borrowed(content) => parse_container( - &mut org.arena, - Container::Block { - node: self.sec_n.unwrap(), - content, - }, - &ParseConfig::default(), - ), - Cow::Owned(ref content) => parse_container( - &mut OwnedArena::new(&mut org.arena), - Container::Block { - node: self.sec_n.unwrap(), - content, - }, - &ParseConfig::default(), - ), - } - - org.debug_validate(); - } - - /// Appends a new child to this document. - /// - /// Returns an error if the given new child was already attached, - /// or the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// ***** h1 - /// **** h2 - /// *** h3 - /// "#, - /// ); - /// - /// let d = org.document(); - /// - /// let mut h4 = Headline::new( - /// Title { - /// raw: "h4".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be smaller than or equal to 3 - /// h4.set_level(4, &mut org).unwrap(); - /// assert!(d.append(h4, &mut org).is_err()); - /// - /// h4.set_level(2, &mut org).unwrap(); - /// assert!(d.append(h4, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// ***** h1 - /// **** h2 - /// *** h3 - /// ** h4 - /// "#, - /// ); - /// - /// // cannot append an attached headline - /// assert!(d.append(h4, &mut org).is_err()); - /// ``` - pub fn append(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(last) = self.last_child(org) { - hdl.check_level(1..=last.lvl)?; - } else { - hdl.check_level(1..=usize::max_value())?; - } - - self.doc_n.append(hdl.hdl_n, &mut org.arena); - - org.debug_validate(); - - Ok(()) - } - - /// Prepends a new child to this document. - /// - /// Returns an error if the given new child was already attached, - /// or the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// ** h2 - /// ** h3 - /// "#, - /// ); - /// - /// let d = org.document(); - /// - /// let mut h1 = Headline::new( - /// Title { - /// raw: "h1".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be greater than 2 - /// h1.set_level(1, &mut org).unwrap(); - /// assert!(d.prepend(h1, &mut org).is_err()); - /// - /// h1.set_level(4, &mut org).unwrap(); - /// assert!(d.prepend(h1, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// **** h1 - /// ** h2 - /// ** h3 - /// "#, - /// ); - /// - /// // cannot prepend an attached headline - /// assert!(d.prepend(h1, &mut org).is_err()); - /// ``` - pub fn prepend(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(first) = self.first_child(org) { - hdl.check_level(first.lvl..=usize::MAX)?; - } else { - hdl.check_level(1..=usize::MAX)?; - } - - if let Some(sec_n) = self.sec_n { - sec_n.insert_after(hdl.hdl_n, &mut org.arena); - } else { - self.doc_n.prepend(hdl.hdl_n, &mut org.arena); - } - - org.debug_validate(); - - Ok(()) - } -} - -/// Represents a headline in `Org` struct. -/// -/// Each `Org` has zero or more `Headline`s. -#[derive(Copy, Clone, Debug)] -pub struct Headline { - lvl: usize, - hdl_n: NodeId, - ttl_n: NodeId, - sec_n: Option, -} - -impl Headline { - /// Creates a new detached Headline. - pub fn new<'a>(ttl: Title<'a>, org: &mut Org<'a>) -> Headline { - let lvl = ttl.level; - let hdl_n = org.arena.new_node(Element::Headline { level: ttl.level }); - let ttl_n = org.arena.new_node(Element::Document { pre_blank: 0 }); // placeholder - hdl_n.append(ttl_n, &mut org.arena); - - match ttl.raw { - Cow::Borrowed(content) => parse_container( - &mut org.arena, - Container::Inline { - node: ttl_n, - content, - }, - &ParseConfig::default(), - ), - Cow::Owned(ref content) => parse_container( - &mut OwnedArena::new(&mut org.arena), - Container::Inline { - node: ttl_n, - content, - }, - &ParseConfig::default(), - ), - } - - org[ttl_n] = Element::Title(ttl); - - Headline { - lvl, - hdl_n, - ttl_n, - sec_n: None, - } - } - - pub(crate) fn from_node(hdl_n: NodeId, lvl: usize, org: &Org) -> Headline { - let ttl_n = org.arena[hdl_n].first_child().unwrap(); - let sec_n = org.arena[ttl_n].next_sibling().and_then(|n| match org[n] { - Element::Section => Some(n), - _ => None, - }); - - Headline { - lvl, - hdl_n, - ttl_n, - sec_n, - } - } - - /// Returns the level of this headline. - pub fn level(self) -> usize { - self.lvl - } - - /// Returns the ID of the headline element of this headline. - pub fn headline_node(self) -> NodeId { - self.hdl_n - } - - /// Returns the ID of the title element of this headline. - pub fn title_node(self) -> NodeId { - self.ttl_n - } - - /// Returns the ID of the section element of this headline, or `None` if it has no section. - pub fn section_node(self) -> Option { - self.sec_n - } - - /// Returns a reference to the title element of this headline. - pub fn title<'a: 'b, 'b>(self, org: &'b Org<'a>) -> &'b Title<'a> { - match &org[self.ttl_n] { - Element::Title(title) => title, - _ => unreachable!(), - } - } - - /// Returns a mutual reference to the title element of this headline. - /// - /// Don't change the level and content of the `&mut Titile` directly. - /// Alternatively, uses [`Headline::set_level`] and [`Headline::set_title_content`]. - /// - /// [`Headline::set_level`]: #method.set_level - /// [`Headline::set_title_content`]: #method.set_title_content - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse("* h1"); - /// - /// let h1 = org.headlines().nth(0).unwrap(); - /// - /// h1.title_mut(&mut org).priority = Some('A'); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// "* [#A] h1\n", - /// ); - /// ``` - pub fn title_mut<'a: 'b, 'b>(self, org: &'b mut Org<'a>) -> &'b mut Title<'a> { - match &mut org[self.ttl_n] { - Element::Title(title) => title, - _ => unreachable!(), - } - } - - /// Changes the level of this headline. - /// - /// Returns an error if this headline is attached and the given new level - /// doesn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ****** h1_1 - /// *** h1_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let mut h1_2 = org.headlines().nth(2).unwrap(); - /// - /// // level must be greater than or equal to 2, and smaller than or equal to 6 - /// assert!(h1_2.set_level(42, &mut org).is_err()); - /// - /// assert!(h1_2.set_level(5, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// ****** h1_1 - /// ***** h1_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// // detached headline's levels can be changed freely - /// let mut new_headline = Headline::new( - /// Title { - /// raw: "new".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// new_headline.set_level(42, &mut org).unwrap(); - /// ``` - pub fn set_level(&mut self, lvl: usize, org: &mut Org) -> ValidationResult<()> { - if !self.is_detached(org) { - let min = self - .next(&org) - .or_else(|| self.parent(&org)) - .map(|hdl| hdl.lvl) - .unwrap_or(1); - let max = self - .previous(&org) - .map(|hdl| hdl.lvl) - .unwrap_or(usize::max_value()); - if !(min..=max).contains(&lvl) { - return Err(ValidationError::HeadlineLevelMismatch { - range: min..=max, - at: self.hdl_n, - }); - } - } - self.lvl = lvl; - self.title_mut(org).level = lvl; - if let Element::Headline { level } = &mut org[self.hdl_n] { - *level = lvl; - } - Ok(()) - } - - /// Changes the title content of this headline. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// "#, - /// ); - /// - /// let h1 = org.headlines().nth(0).unwrap(); - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// - /// h1.set_title_content("H1", &mut org); - /// h1_1.set_title_content(String::from("*H1_1*"), &mut org); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * H1 - /// ** *H1_1* - /// "#, - /// ); - /// ``` - pub fn set_title_content<'a, S>(self, content: S, org: &mut Org<'a>) - where - S: Into>, - { - let content = content.into(); - - let children: Vec<_> = self.ttl_n.children(&org.arena).collect(); - for child in children { - child.detach(&mut org.arena); - } - - match &content { - Cow::Borrowed(content) => parse_container( - &mut org.arena, - Container::Inline { - node: self.ttl_n, - content, - }, - &ParseConfig::default(), - ), - Cow::Owned(ref content) => parse_container( - &mut OwnedArena::new(&mut org.arena), - Container::Inline { - node: self.ttl_n, - content, - }, - &ParseConfig::default(), - ), - } - - self.title_mut(org).raw = content; - - org.debug_validate(); - } - - /// Changes the section content of this headline. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// s1_1 - /// "#, - /// ); - /// - /// let mut h1 = org.headlines().nth(0).unwrap(); - /// let mut h1_1 = org.headlines().nth(1).unwrap(); - /// - /// h1.set_section_content("s1", &mut org); - /// h1_1.set_section_content(String::from("*s1_1*"), &mut org); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// s1 - /// ** h1_1 - /// *s1_1* - /// "#, - /// ); - /// ``` - pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) - where - S: Into>, - { - if let Some(sec_n) = self.sec_n { - let children: Vec<_> = sec_n.children(&org.arena).collect(); - for child in children { - child.detach(&mut org.arena); - } - } else { - let sec_n = org.arena.new_node(Element::Section); - self.sec_n = Some(sec_n); - self.ttl_n.insert_after(sec_n, &mut org.arena); - } - - match content.into() { - Cow::Borrowed(content) => parse_container( - &mut org.arena, - Container::Block { - node: self.sec_n.unwrap(), - content, - }, - &ParseConfig::default(), - ), - Cow::Owned(ref content) => parse_container( - &mut OwnedArena::new(&mut org.arena), - Container::Block { - node: self.sec_n.unwrap(), - content, - }, - &ParseConfig::default(), - ), - } - - org.debug_validate(); - } - - /// Returns the parent of this headline, or `None` if it is detached or attached to the document. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1 = org.headlines().nth(0).unwrap(); - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// let h1_2_1 = org.headlines().nth(3).unwrap(); - /// - /// assert_eq!(h1_1.parent(&org).unwrap().title(&org).raw, "h1"); - /// assert_eq!(h1_2_1.parent(&org).unwrap().title(&org).raw, "h1_2"); - /// - /// assert!(h1.parent(&org).is_none()); - /// - /// // detached headline have no parent - /// assert!(Headline::new(Title::default(), &mut org).parent(&org).is_none()); - /// ``` - pub fn parent(self, org: &Org) -> Option { - org.arena[self.hdl_n].parent().and_then(|n| match org[n] { - Element::Headline { level } => Some(Headline::from_node(n, level, org)), - Element::Document { .. } => None, - _ => unreachable!(), - }) - } - - /// Returns an iterator of this headline's children. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1 = org.headlines().nth(0).unwrap(); - /// - /// let mut iter = h1.children(&org); - /// - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_1"); - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_2"); - /// assert_eq!(iter.next().unwrap().title(&org).raw, "h1_3"); - /// assert!(iter.next().is_none()); - /// ``` - pub fn children<'a>(self, org: &'a Org) -> impl Iterator + 'a { - self.hdl_n - .children(&org.arena) - // skip title and section - .skip(if self.sec_n.is_some() { 2 } else { 1 }) - .filter_map(move |n| match org[n] { - Element::Headline { level } => Some(Headline::from_node(n, level, org)), - _ => unreachable!(), - }) - } - - /// Returns the first child of this headline, or `None` if it has no child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// let h1_2 = org.headlines().nth(2).unwrap(); - /// let h1_3 = org.headlines().nth(5).unwrap(); - /// - /// assert_eq!(h1_2.first_child(&org).unwrap().title(&org).raw, "h1_2_1"); - /// - /// assert!(h1_1.first_child(&org).is_none()); - /// assert!(h1_3.first_child(&org).is_none()); - /// ``` - pub fn first_child(self, org: &Org) -> Option { - self.hdl_n - .children(&org.arena) - // skip title and section - .nth(if self.sec_n.is_some() { 2 } else { 1 }) - .map(|n| match org[n] { - Element::Headline { level } => Headline::from_node(n, level, org), - _ => unreachable!(), - }) - } - - /// Returns the last child of this headline, or `None` if it has no child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// let h1_2 = org.headlines().nth(2).unwrap(); - /// let h1_3 = org.headlines().nth(5).unwrap(); - /// - /// assert_eq!(h1_2.last_child(&org).unwrap().title(&org).raw, "h1_2_2"); - /// - /// assert!(h1_1.last_child(&org).is_none()); - /// assert!(h1_3.last_child(&org).is_none()); - /// ``` - pub fn last_child(self, org: &Org) -> Option { - org.arena[self.hdl_n] - .last_child() - .and_then(|n| match org[n] { - Element::Headline { level } => Some(Headline::from_node(n, level, org)), - Element::Section | Element::Title(_) => None, - _ => unreachable!("Headline should only contains section and headline."), - }) - } - - /// Returns the previous sibling of this headline, or `None` if it is a first child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// let h1_2 = org.headlines().nth(2).unwrap(); - /// let h1_2_1 = org.headlines().nth(3).unwrap(); - /// - /// assert_eq!(h1_2.previous(&org).unwrap().title(&org).raw, "h1_1"); - /// - /// assert!(h1_1.previous(&org).is_none()); - /// assert!(h1_2_1.previous(&org).is_none()); - /// ``` - pub fn previous(self, org: &Org) -> Option { - org.arena[self.hdl_n] - .previous_sibling() - .and_then(|n| match org[n] { - Element::Headline { level } => Some(Headline::from_node(n, level, org)), - Element::Title(_) | Element::Section => None, - _ => unreachable!(), - }) - } - - /// Returns the next sibling of this headline, or `None` if it is a last child. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1_2 = org.headlines().nth(2).unwrap(); - /// let h1_2_2 = org.headlines().nth(4).unwrap(); - /// let h1_3 = org.headlines().nth(5).unwrap(); - /// - /// assert_eq!(h1_2.next(&org).unwrap().title(&org).raw, "h1_3"); - /// - /// assert!(h1_3.next(&org).is_none()); - /// assert!(h1_2_2.next(&org).is_none()); - /// ``` - pub fn next(self, org: &Org) -> Option { - org.arena[self.hdl_n].next_sibling().map(|n| match org[n] { - Element::Headline { level } => Headline::from_node(n, level, org), - _ => unreachable!(), - }) - } - - /// Detaches this headline from arena. - /// - /// ```rust - /// # use orgize::Org; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_2 - /// *** h1_2_1 - /// *** h1_2_2 - /// ** h1_3 - /// "#, - /// ); - /// - /// let h1_2 = org.headlines().nth(2).unwrap(); - /// - /// h1_2.detach(&mut org); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// ** h1_1 - /// ** h1_3 - /// "#, - /// ); - /// ``` - pub fn detach(self, org: &mut Org) { - self.hdl_n.detach(&mut org.arena); - } - - /// Returns `true` if this headline is detached. - pub fn is_detached(self, org: &Org) -> bool { - org.arena[self.hdl_n].parent().is_none() - } - - /// Appends a new child to this headline. - /// - /// Returns an error if the given new child was already attached, or - /// the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ***** h1_1_1 - /// "#, - /// ); - /// - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// - /// let mut h1_1_2 = Headline::new( - /// Title { - /// raw: "h1_1_2".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be greater than 2, and smaller than or equal to 5 - /// h1_1_2.set_level(2, &mut org).unwrap(); - /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); - /// h1_1_2.set_level(6, &mut org).unwrap(); - /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); - /// - /// h1_1_2.set_level(4, &mut org).unwrap(); - /// assert!(h1_1.append(h1_1_2, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// ** h1_1 - /// ***** h1_1_1 - /// **** h1_1_2 - /// "#, - /// ); - /// - /// // cannot append an attached headline - /// assert!(h1_1.append(h1_1_2, &mut org).is_err()); - /// ``` - pub fn append(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(last) = self.last_child(org) { - hdl.check_level(self.lvl + 1..=last.lvl)?; - } else { - hdl.check_level(self.lvl + 1..=usize::MAX)?; - } - - self.hdl_n.append(hdl.hdl_n, &mut org.arena); - - org.debug_validate(); - - Ok(()) - } - - /// Prepends a new child to this headline. - /// - /// Returns an error if the given new child was already attached, or - /// the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// ***** h1_1_1 - /// "#, - /// ); - /// - /// let h1_1 = org.headlines().nth(1).unwrap(); - /// - /// let mut h1_1_2 = Headline::new( - /// Title { - /// raw: "h1_1_2".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be greater than or equal to 5 - /// h1_1_2.set_level(2, &mut org).unwrap(); - /// assert!(h1_1.prepend(h1_1_2, &mut org).is_err()); - /// - /// h1_1_2.set_level(5, &mut org).unwrap(); - /// assert!(h1_1.prepend(h1_1_2, &mut org).is_ok()); - /// - /// // cannot prepend an attached headline - /// assert!(h1_1.prepend(h1_1_2, &mut org).is_err()); - /// ``` - pub fn prepend(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(first) = self.first_child(org) { - hdl.check_level(first.lvl..=usize::MAX)?; - } else { - hdl.check_level(self.lvl + 1..=usize::MAX)?; - } - - self.sec_n - .unwrap_or(self.ttl_n) - .insert_after(hdl.hdl_n, &mut org.arena); - - org.debug_validate(); - - Ok(()) - } - - /// Inserts a new sibling before this headline. - /// - /// Returns an error if the given new child was already attached, or - /// the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// **** h1_1_1 - /// *** h1_1_3 - /// "#, - /// ); - /// - /// let h1_1_3 = org.headlines().nth(3).unwrap(); - /// - /// let mut h1_1_2 = Headline::new( - /// Title { - /// raw: "h1_1_2".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be greater than or equal to 3, but smaller than or equal to 4 - /// h1_1_2.set_level(2, &mut org).unwrap(); - /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); - /// h1_1_2.set_level(5, &mut org).unwrap(); - /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); - /// - /// h1_1_2.set_level(4, &mut org).unwrap(); - /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// ** h1_1 - /// **** h1_1_1 - /// **** h1_1_2 - /// *** h1_1_3 - /// "#, - /// ); - /// - /// // cannot insert an attached headline - /// assert!(h1_1_3.insert_before(h1_1_2, &mut org).is_err()); - /// ``` - pub fn insert_before(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(previous) = self.previous(org) { - hdl.check_level(self.lvl..=previous.lvl)?; - } else { - hdl.check_level(self.lvl..=usize::MAX)?; - } - - self.hdl_n.insert_before(hdl.hdl_n, &mut org.arena); - - org.debug_validate(); - - Ok(()) - } - - /// Inserts a new sibling after this headline. - /// - /// Returns an error if the given new child was already attached, or - /// the given new child didn't meet the requirements. - /// - /// ```rust - /// # use orgize::{elements::Title, Headline, Org}; - /// # - /// let mut org = Org::parse( - /// r#" - /// * h1 - /// ** h1_1 - /// **** h1_1_1 - /// *** h1_1_3 - /// "#, - /// ); - /// - /// let h1_1_1 = org.headlines().nth(2).unwrap(); - /// - /// let mut h1_1_2 = Headline::new( - /// Title { - /// raw: "h1_1_2".into(), - /// ..Default::default() - /// }, - /// &mut org, - /// ); - /// - /// // level must be greater than or equal to 3, but smaller than or equal to 4 - /// h1_1_2.set_level(2, &mut org).unwrap(); - /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); - /// h1_1_2.set_level(5, &mut org).unwrap(); - /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); - /// - /// h1_1_2.set_level(4, &mut org).unwrap(); - /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_ok()); - /// - /// let mut writer = Vec::new(); - /// org.write_org(&mut writer).unwrap(); - /// assert_eq!( - /// String::from_utf8(writer).unwrap(), - /// r#" - /// * h1 - /// ** h1_1 - /// **** h1_1_1 - /// **** h1_1_2 - /// *** h1_1_3 - /// "#, - /// ); - /// - /// // cannot insert an attached headline - /// assert!(h1_1_1.insert_after(h1_1_2, &mut org).is_err()); - /// ``` - pub fn insert_after(self, hdl: Headline, org: &mut Org) -> ValidationResult<()> { - hdl.check_detached(org)?; - - if let Some(next) = self.next(org) { - hdl.check_level(next.lvl..=self.lvl)?; - } else if let Some(parent) = self.parent(org) { - hdl.check_level(parent.lvl + 1..=self.lvl)?; - } else { - hdl.check_level(1..=self.lvl)?; - } - - self.hdl_n.insert_after(hdl.hdl_n, &mut org.arena); - - org.debug_validate(); - - Ok(()) - } - - fn check_detached(self, org: &Org) -> ValidationResult<()> { - if !self.is_detached(org) { - Err(ValidationError::ExpectedDetached { at: self.hdl_n }) - } else { - Ok(()) - } - } - - fn check_level(self, range: RangeInclusive) -> ValidationResult<()> { - if !range.contains(&self.lvl) { - Err(ValidationError::HeadlineLevelMismatch { - range, - at: self.hdl_n, - }) - } else { - Ok(()) - } - } -} - -impl Org<'_> { - /// Returns the `Document`. - pub fn document(&self) -> Document { - Document::from_org(self) - } - - /// Returns an iterator of `Headline`s. - pub fn headlines(&self) -> impl Iterator + '_ { - self.root - .descendants(&self.arena) - .skip(1) - .filter_map(move |node| match self[node] { - Element::Headline { level } => Some(Headline::from_node(node, level, self)), - _ => None, - }) - } -} diff --git a/src/lib.rs b/src/lib.rs index f8e81f0..c1fd96a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,242 +1,23 @@ -//! A Rust library for parsing orgmode files. -//! -//! [Live demo](https://orgize.herokuapp.com/) -//! -//! # Parse -//! -//! To parse a orgmode string, simply invoking the [`Org::parse`] function: -//! -//! [`Org::parse`]: struct.Org.html#method.parse -//! -//! ```rust -//! use orgize::Org; -//! -//! Org::parse("* DONE Title :tag:"); -//! ``` -//! -//! or [`Org::parse_custom`]: -//! -//! [`Org::parse_custom`]: struct.Org.html#method.parse_custom -//! -//! ```rust -//! use orgize::{Org, ParseConfig}; -//! -//! Org::parse_custom( -//! "* TASK Title 1", -//! &ParseConfig { -//! // custom todo keywords -//! todo_keywords: (vec!["TASK".to_string()], vec![]), -//! ..Default::default() -//! }, -//! ); -//! ``` -//! -//! # Iter -//! -//! [`Org::iter`] function will returns an iterator of [`Event`]s, which is -//! a simple wrapper of [`Element`]. -//! -//! [`Org::iter`]: struct.Org.html#method.iter -//! [`Event`]: enum.Event.html -//! [`Element`]: elements/enum.Element.html -//! -//! ```rust -//! use orgize::Org; -//! -//! for event in Org::parse("* DONE Title :tag:").iter() { -//! // handling the event -//! } -//! ``` -//! -//! **Note**: whether an element is container or not, it will appears twice in one loop. -//! One as [`Event::Start(element)`], one as [`Event::End(element)`]. -//! -//! [`Event::Start(element)`]: enum.Event.html#variant.Start -//! [`Event::End(element)`]: enum.Event.html#variant.End -//! -//! # Render html -//! -//! You can call the [`Org::write_html`] function to generate html directly, which -//! uses the [`DefaultHtmlHandler`] internally: -//! -//! [`Org::write_html`]: struct.Org.html#method.write_html -//! [`DefaultHtmlHandler`]: export/struct.DefaultHtmlHandler.html -//! -//! ```rust -//! use orgize::Org; -//! -//! let mut writer = Vec::new(); -//! Org::parse("* title\n*section*").write_html(&mut writer).unwrap(); -//! -//! assert_eq!( -//! String::from_utf8(writer).unwrap(), -//! "

    title

    section

    " -//! ); -//! ``` -//! -//! # Render html with custom `HtmlHandler` -//! -//! To customize html rendering, simply implementing [`HtmlHandler`] trait and passing -//! it to the [`Org::write_html_custom`] function. -//! -//! [`HtmlHandler`]: export/trait.HtmlHandler.html -//! [`Org::write_html_custom`]: struct.Org.html#method.write_html_custom -//! -//! The following code demonstrates how to add a id for every headline and return -//! own error type while rendering. -//! -//! ```rust -//! use std::convert::From; -//! use std::io::{Error as IOError, Write}; -//! use std::string::FromUtf8Error; -//! -//! use orgize::export::{DefaultHtmlHandler, HtmlHandler}; -//! use orgize::{Element, Org}; -//! use slugify::slugify; -//! -//! #[derive(Debug)] -//! enum MyError { -//! IO(IOError), -//! Heading, -//! Utf8(FromUtf8Error), -//! } -//! -//! // From trait is required for custom error type -//! impl From for MyError { -//! fn from(err: IOError) -> Self { -//! MyError::IO(err) -//! } -//! } -//! -//! impl From for MyError { -//! fn from(err: FromUtf8Error) -> Self { -//! MyError::Utf8(err) -//! } -//! } -//! -//! #[derive(Default)] -//! struct MyHtmlHandler(DefaultHtmlHandler); -//! -//! impl HtmlHandler for MyHtmlHandler { -//! fn start(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { -//! if let Element::Title(title) = element { -//! if title.level > 6 { -//! return Err(MyError::Heading); -//! } else { -//! write!( -//! w, -//! "", -//! title.level, -//! slugify!(&title.raw), -//! )?; -//! } -//! } else { -//! // fallthrough to default handler -//! self.0.start(w, element)?; -//! } -//! Ok(()) -//! } -//! -//! fn end(&mut self, mut w: W, element: &Element) -> Result<(), MyError> { -//! if let Element::Title(title) = element { -//! write!(w, "", title.level)?; -//! } else { -//! self.0.end(w, element)?; -//! } -//! Ok(()) -//! } -//! } -//! -//! fn main() -> Result<(), MyError> { -//! let mut writer = Vec::new(); -//! let mut handler = MyHtmlHandler::default(); -//! Org::parse("* title\n*section*").write_html_custom(&mut writer, &mut handler)?; -//! -//! assert_eq!( -//! String::from_utf8(writer)?, -//! "

    title

    \ -//!

    section

    " -//! ); -//! -//! Ok(()) -//! } -//! ``` -//! -//! **Note**: as I mentioned above, each element will appears two times while iterating. -//! And handler will silently ignores all end events from non-container elements. -//! -//! So if you want to change how a non-container element renders, just redefine the `start` -//! function and leave the `end` function unchanged. -//! -//! # Serde -//! -//! `Org` struct have already implemented serde's `Serialize` trait. It means you can -//! serialize it into any format supported by serde, such as json: -//! -//! ```rust -//! use orgize::Org; -//! use serde_json::{json, to_string}; -//! -//! let org = Org::parse("I 'm *bold*."); -//! #[cfg(feature = "ser")] -//! println!("{}", to_string(&org).unwrap()); -//! -//! // { -//! // "type": "document", -//! // "children": [{ -//! // "type": "section", -//! // "children": [{ -//! // "type": "paragraph", -//! // "children":[{ -//! // "type": "text", -//! // "value":"I 'm " -//! // }, { -//! // "type": "bold", -//! // "children":[{ -//! // "type": "text", -//! // "value": "bold" -//! // }] -//! // }, { -//! // "type":"text", -//! // "value":"." -//! // }] -//! // }] -//! // }] -//! // } -//! ``` -//! -//! # Features -//! -//! By now, orgize provides three features: -//! -//! + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. -//! -//! + `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default. -//! -//! + `syntect`: provides [`SyntectHtmlHandler`] for highlighting code block, disabled by default. -//! -//! [`SyntectHtmlHandler`]: export/struct.SyntectHtmlHandler.html -//! -//! # License -//! -//! MIT +#![doc = include_str!("../README.md")] -mod config; -pub mod elements; +pub mod ast; +pub mod config; +mod entities; pub mod export; -mod headline; mod org; -mod parse; -mod parsers; -mod validate; +mod replace; +mod syntax; +#[cfg(test)] +mod tests; -// Re-export of the indextree crate. -pub use indextree; -#[cfg(feature = "syntect")] -pub use syntect; +// Re-export of the rowan crate. +pub use rowan; pub use config::ParseConfig; -pub use elements::Element; -pub use headline::{Document, Headline}; -pub use org::{Event, Org}; -pub use validate::ValidationError; +pub use org::Org; +pub use rowan::{TextRange, TextSize}; +pub use syntax::{ + SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxNodeChildren, SyntaxToken, +}; + +pub(crate) use syntax::combinator::lossless_parser; diff --git a/src/org.rs b/src/org.rs index 37c06fc..f0e736b 100644 --- a/src/org.rs +++ b/src/org.rs @@ -1,193 +1,106 @@ -use indextree::{Arena, NodeEdge, NodeId}; -use std::io::{Error, Write}; -use std::ops::{Index, IndexMut}; +use rowan::ast::AstNode; +use rowan::{GreenNode, TextSize}; -use crate::{ - config::{ParseConfig, DEFAULT_CONFIG}, - elements::{Element, Keyword}, - export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler}, - parsers::{blank_lines_count, parse_container, Container, OwnedArena}, -}; - -pub struct Org<'a> { - pub(crate) arena: Arena>, - pub(crate) root: NodeId, -} +use crate::ast::Document; +use crate::config::ParseConfig; +use crate::export::{HtmlExport, TraversalContext, Traverser}; +use crate::syntax::{OrgLanguage, SyntaxNode}; +use crate::SyntaxElement; #[derive(Debug)] -pub enum Event<'a, 'b> { - Start(&'b Element<'a>), - End(&'b Element<'a>), +pub struct Org { + pub(crate) green: GreenNode, + pub(crate) config: ParseConfig, } -impl<'a> Org<'a> { - /// Creates a new empty `Org` struct. - pub fn new() -> Org<'static> { - let mut arena = Arena::new(); - let root = arena.new_node(Element::Document { pre_blank: 0 }); - Org { arena, root } +impl Org { + /// Parse input string to Org element tree using default parse config + pub fn parse(input: impl AsRef) -> Org { + ParseConfig::default().parse(input) } - /// Parses string `text` into `Org` struct. - pub fn parse(text: &'a str) -> Org<'a> { - Org::parse_custom(text, &DEFAULT_CONFIG) + pub fn green(&self) -> &GreenNode { + &self.green } - /// Likes `parse`, but accepts `String`. - pub fn parse_string(text: String) -> Org<'static> { - Org::parse_string_custom(text, &DEFAULT_CONFIG) + pub fn config(&self) -> &ParseConfig { + &self.config } - /// Parses string `text` into `Org` struct with custom `ParseConfig`. - pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> { - let mut arena = Arena::new(); - let (text, pre_blank) = blank_lines_count(text); - let root = arena.new_node(Element::Document { pre_blank }); - let mut org = Org { arena, root }; + /// Returns the document + pub fn document(&self) -> Document { + Document { + syntax: SyntaxNode::new_root(self.green.clone()), + } + } - parse_container( - &mut org.arena, - Container::Document { - content: text, - node: org.root, - }, - config, + /// Returns org-mode string + pub fn to_org(&self) -> String { + self.green.to_string() + } + + /// Convert org element tree to html-format using default html handler + pub fn to_html(&self) -> String { + let mut handler = HtmlExport::default(); + self.traverse(&mut handler); + handler.finish() + } + + /// Walk through org element tree using given traverser + pub fn traverse(&self, t: &mut T) { + let mut ctx = TraversalContext::default(); + t.element( + SyntaxElement::Node(SyntaxNode::new_root(self.green.clone())), + &mut ctx, ); - - org.debug_validate(); - - org } - /// Likes `parse_custom`, but accepts `String`. - pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> { - let mut arena = Arena::new(); - let (text, pre_blank) = blank_lines_count(&text); - let root = arena.new_node(Element::Document { pre_blank }); - let mut org = Org { arena, root }; - - parse_container( - &mut OwnedArena::new(&mut org.arena), - Container::Document { - content: text, - node: org.root, - }, - config, - ); - - org.debug_validate(); - - org - } - - /// Returns a reference to the underlay arena. - pub fn arena(&self) -> &Arena> { - &self.arena - } - - /// Returns a mutual reference to the underlay arena. - pub fn arena_mut(&mut self) -> &mut Arena> { - &mut self.arena - } - - /// Returns an iterator of `Event`s. - pub fn iter<'b>(&'b self) -> impl Iterator> + 'b { - self.root.traverse(&self.arena).map(move |edge| match edge { - NodeEdge::Start(node) => Event::Start(&self[node]), - NodeEdge::End(node) => Event::End(&self[node]), - }) - } - - /// Returns an iterator of `Keyword`s. - pub fn keywords(&self) -> impl Iterator> { - self.root - .descendants(&self.arena) - .skip(1) - .filter_map(move |node| match &self[node] { - Element::Keyword(kw) => Some(kw), - _ => None, - }) - } - - /// Writes an `Org` struct as html format. - pub fn write_html(&self, writer: W) -> Result<(), Error> - where - W: Write, - { - self.write_html_custom(writer, &mut DefaultHtmlHandler) - } - - /// Writes an `Org` struct as html format with custom `HtmlHandler`. - pub fn write_html_custom(&self, mut writer: W, handler: &mut H) -> Result<(), E> - where - W: Write, - E: From, - H: HtmlHandler, - { - for event in self.iter() { - match event { - Event::Start(element) => handler.start(&mut writer, element)?, - Event::End(element) => handler.end(&mut writer, element)?, + /// Returns the first node in org element tree in depth first order + pub fn first_node>(&self) -> Option { + fn find>(node: SyntaxNode) -> Option { + if N::can_cast(node.kind()) { + N::cast(node) + } else { + node.children().find_map(find) } } - - Ok(()) + find(SyntaxNode::new_root(self.green.clone())) } - /// Writes an `Org` struct as org format. - pub fn write_org(&self, writer: W) -> Result<(), Error> - where - W: Write, - { - self.write_org_custom(writer, &mut DefaultOrgHandler) - } - - /// Writes an `Org` struct as org format with custom `OrgHandler`. - pub fn write_org_custom(&self, mut writer: W, handler: &mut H) -> Result<(), E> - where - W: Write, - E: From, - H: OrgHandler, - { - for event in self.iter() { - match event { - Event::Start(element) => handler.start(&mut writer, element)?, - Event::End(element) => handler.end(&mut writer, element)?, + /// Returns node in given offset + /// + /// ```rust + /// use orgize::{Org, ast::Headline}; + /// + /// let org = Org::parse("\n\n* foo\n* bar"); + /// + /// assert!(org.node_at_offset::(0).is_none()); + /// + /// let hdl = org.node_at_offset::(2).unwrap(); + /// assert_eq!(hdl.title_raw(), "foo"); + /// + /// let hdl = org.node_at_offset::(9).unwrap(); + /// assert_eq!(hdl.title_raw(), "bar"); + /// + /// assert!(org.node_at_offset::(999).is_none()); + /// ``` + pub fn node_at_offset>( + &self, + offset: impl Into, + ) -> Option { + let offset = offset.into(); + fn find>( + node: SyntaxNode, + offset: TextSize, + ) -> Option { + if !node.text_range().contains(offset) { + None + } else if N::can_cast(node.kind()) { + N::cast(node) + } else { + node.children().find_map(|node| find(node, offset)) } } - - Ok(()) - } -} - -impl Default for Org<'static> { - fn default() -> Self { - Org::new() - } -} - -impl<'a> Index for Org<'a> { - type Output = Element<'a>; - - fn index(&self, node_id: NodeId) -> &Self::Output { - self.arena[node_id].get() - } -} - -impl<'a> IndexMut for Org<'a> { - fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output { - self.arena[node_id].get_mut() - } -} - -#[cfg(feature = "ser")] -use serde::{ser::Serializer, Serialize}; - -#[cfg(feature = "ser")] -impl Serialize for Org<'_> { - fn serialize(&self, serializer: S) -> Result { - use serde_indextree::Node; - - serializer.serialize_newtype_struct("Org", &Node::new(self.root, &self.arena)) + find(SyntaxNode::new_root(self.green.clone()), offset) } } diff --git a/src/parse/combinators.rs b/src/parse/combinators.rs deleted file mode 100644 index f5d518e..0000000 --- a/src/parse/combinators.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Parsers combinators - -use memchr::memchr; -use nom::{ - bytes::complete::take_while1, - combinator::verify, - error::{make_error, ErrorKind}, - Err, IResult, -}; - -// read until the first line_ending, if line_ending is not present, return the input directly -pub fn line(input: &str) -> IResult<&str, &str, ()> { - if let Some(i) = memchr(b'\n', input.as_bytes()) { - if i > 0 && input.as_bytes()[i - 1] == b'\r' { - Ok((&input[i + 1..], &input[0..i - 1])) - } else { - Ok((&input[i + 1..], &input[0..i])) - } - } else { - Ok(("", input)) - } -} - -pub fn lines_till(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()> -where - F: Fn(&str) -> bool, -{ - move |i| { - let mut input = i; - - loop { - // TODO: better error kind - if input.is_empty() { - return Err(Err::Error(make_error(input, ErrorKind::Many0))); - } - - let (input_, line_) = line(input)?; - - debug_assert_ne!(input, input_); - - if predicate(line_) { - let offset = i.len() - input.len(); - return Ok((input_, &i[0..offset])); - } - - input = input_; - } - } -} - -pub fn lines_while(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()> -where - F: Fn(&str) -> bool, -{ - move |i| { - let mut input = i; - - loop { - // unlike lines_till, line_while won't return error - if input.is_empty() { - return Ok(("", i)); - } - - let (input_, line_) = line(input)?; - - debug_assert_ne!(input, input_); - - if !predicate(line_) { - let offset = i.len() - input.len(); - return Ok((input, &i[0..offset])); - } - - input = input_; - } - } -} - -#[test] -fn test_lines_while() { - assert_eq!(lines_while(|line| line == "foo")("foo"), Ok(("", "foo"))); - assert_eq!(lines_while(|line| line == "foo")("bar"), Ok(("bar", ""))); - assert_eq!( - lines_while(|line| line == "foo")("foo\n\n"), - Ok(("\n", "foo\n")) - ); - assert_eq!( - lines_while(|line| line.trim().is_empty())("\n\n\n"), - Ok(("", "\n\n\n")) - ); -} - -pub fn eol(input: &str) -> IResult<&str, &str, ()> { - verify(line, |s: &str| { - s.as_bytes().iter().all(u8::is_ascii_whitespace) - })(input) -} - -pub fn one_word(input: &str) -> IResult<&str, &str, ()> { - take_while1(|c: char| !c.is_ascii_whitespace())(input) -} - -pub fn blank_lines_count(input: &str) -> IResult<&str, usize, ()> { - let mut count = 0; - let mut input = input; - - loop { - if input.is_empty() { - return Ok(("", count)); - } - - let (input_, line_) = line(input)?; - - debug_assert_ne!(input, input_); - - if !line_.chars().all(char::is_whitespace) { - return Ok((input, count)); - } - - count += 1; - - input = input_; - } -} - -#[test] -fn test_blank_lines_count() { - assert_eq!(blank_lines_count("foo"), Ok(("foo", 0))); - assert_eq!(blank_lines_count(" foo"), Ok((" foo", 0))); - assert_eq!(blank_lines_count(" \t\nfoo\n"), Ok(("foo\n", 1))); - assert_eq!(blank_lines_count("\n \r\n\nfoo\n"), Ok(("foo\n", 3))); - assert_eq!( - blank_lines_count("\r\n \n \r\n foo\n"), - Ok((" foo\n", 3)) - ); - assert_eq!(blank_lines_count("\r\n \n \r\n \n"), Ok(("", 4))); -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs deleted file mode 100644 index 0c49327..0000000 --- a/src/parse/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod combinators; diff --git a/src/parsers.rs b/src/parsers.rs deleted file mode 100644 index d277057..0000000 --- a/src/parsers.rs +++ /dev/null @@ -1,657 +0,0 @@ -use std::iter::once; -use std::marker::PhantomData; - -use indextree::{Arena, NodeId}; -use jetscii::{bytes, BytesConst}; -use memchr::{memchr, memchr_iter}; -use nom::bytes::complete::take_while1; - -use crate::config::ParseConfig; -use crate::elements::{ - block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target, - Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall, - InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target, - Timestamp, Title, -}; -use crate::parse::combinators::lines_while; - -pub trait ElementArena<'a> { - fn append(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>; - fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>; - fn set(&mut self, node: NodeId, element: T) - where - T: Into>; -} - -pub type BorrowedArena<'a> = Arena>; - -impl<'a> ElementArena<'a> for BorrowedArena<'a> { - fn append(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>, - { - let node = self.new_node(element.into()); - parent.append(node, self); - node - } - - fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>, - { - if let Some(child) = self[parent].last_child() { - let node = self.new_node(element.into()); - child.insert_before(node, self); - node - } else { - self.append(element, parent) - } - } - - fn set(&mut self, node: NodeId, element: T) - where - T: Into>, - { - *self[node].get_mut() = element.into(); - } -} - -pub struct OwnedArena<'a, 'b, 'c> { - arena: &'b mut Arena>, - phantom: PhantomData<&'a ()>, -} - -impl<'a, 'b, 'c> OwnedArena<'a, 'b, 'c> { - pub fn new(arena: &'b mut Arena>) -> OwnedArena<'a, 'b, 'c> { - OwnedArena { - arena, - phantom: PhantomData, - } - } -} - -impl<'a> ElementArena<'a> for OwnedArena<'a, '_, '_> { - fn append(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>, - { - self.arena.append(element.into().into_owned(), parent) - } - - fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId - where - T: Into>, - { - self.arena - .insert_before_last_child(element.into().into_owned(), parent) - } - - fn set(&mut self, node: NodeId, element: T) - where - T: Into>, - { - self.arena.set(node, element.into().into_owned()); - } -} - -#[derive(Debug)] -pub enum Container<'a> { - // Block, List Item - Block { content: &'a str, node: NodeId }, - // Paragraph, Inline Markup - Inline { content: &'a str, node: NodeId }, - // Headline - Headline { content: &'a str, node: NodeId }, - // Document - Document { content: &'a str, node: NodeId }, -} - -pub fn parse_container<'a, T: ElementArena<'a>>( - arena: &mut T, - container: Container<'a>, - config: &ParseConfig, -) { - let containers = &mut vec![container]; - - while let Some(container) = containers.pop() { - match container { - Container::Document { content, node } => { - parse_section_and_headlines(arena, content, node, containers); - } - Container::Headline { content, node } => { - parse_headline_content(arena, content, node, containers, config); - } - Container::Block { content, node } => { - parse_blocks(arena, content, node, containers); - } - Container::Inline { content, node } => { - parse_inlines(arena, content, node, containers); - } - } - } -} - -pub fn parse_headline_content<'a, T: ElementArena<'a>>( - arena: &mut T, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, - config: &ParseConfig, -) { - let (tail, (title, content)) = Title::parse(content, config).unwrap(); - let node = arena.append(title, parent); - containers.push(Container::Inline { content, node }); - parse_section_and_headlines(arena, tail, parent, containers); -} - -pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( - arena: &mut T, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, -) { - let content = blank_lines_count(content).0; - - if content.is_empty() { - return; - } - - let mut last_end = 0; - for i in memchr_iter(b'\n', content.as_bytes()).chain(once(content.len())) { - if let Some((mut tail, (headline_content, level))) = parse_headline(&content[last_end..]) { - if last_end != 0 { - let node = arena.append(Element::Section, parent); - let content = &content[0..last_end]; - containers.push(Container::Block { content, node }); - } - - let node = arena.append(Element::Headline { level }, parent); - containers.push(Container::Headline { - content: headline_content, - node, - }); - - while let Some((new_tail, (content, level))) = parse_headline(tail) { - debug_assert_ne!(tail, new_tail); - let node = arena.append(Element::Headline { level }, parent); - containers.push(Container::Headline { content, node }); - tail = new_tail; - } - return; - } - last_end = i + 1; - } - - let node = arena.append(Element::Section, parent); - containers.push(Container::Block { content, node }); -} - -pub fn parse_blocks<'a, T: ElementArena<'a>>( - arena: &mut T, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, -) { - let mut tail = blank_lines_count(content).0; - - if let Some(new_tail) = parse_block(content, arena, parent, containers) { - tail = blank_lines_count(new_tail).0; - } - - let mut text = tail; - let mut pos = 0; - - while !tail.is_empty() { - let i = memchr(b'\n', tail.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| tail.len()); - if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { - let (tail_, blank) = blank_lines_count(&tail[i..]); - debug_assert_ne!(tail, tail_); - tail = tail_; - - let node = arena.append( - Element::Paragraph { - // including the current line (&tail[0..i]) - post_blank: blank + 1, - }, - parent, - ); - - containers.push(Container::Inline { - content: &text[0..pos].trim_end(), - node, - }); - - pos = 0; - text = tail; - } else if let Some(new_tail) = parse_block(tail, arena, parent, containers) { - if pos != 0 { - let node = - arena.insert_before_last_child(Element::Paragraph { post_blank: 0 }, parent); - - containers.push(Container::Inline { - content: &text[0..pos].trim_end(), - node, - }); - - pos = 0; - } - debug_assert_ne!(tail, blank_lines_count(new_tail).0); - tail = blank_lines_count(new_tail).0; - text = tail; - } else { - debug_assert_ne!(tail, &tail[i..]); - tail = &tail[i..]; - pos += i; - } - } - - if !text.is_empty() { - let node = arena.append(Element::Paragraph { post_blank: 0 }, parent); - - containers.push(Container::Inline { - content: &text[0..pos].trim_end(), - node, - }); - } -} - -pub fn parse_block<'a, T: ElementArena<'a>>( - contents: &'a str, - arena: &mut T, - parent: NodeId, - containers: &mut Vec>, -) -> Option<&'a str> { - match contents - .as_bytes() - .iter() - .find(|c| !c.is_ascii_whitespace())? - { - b'[' => { - let (tail, (fn_def, content)) = FnDef::parse(contents)?; - let node = arena.append(fn_def, parent); - containers.push(Container::Block { content, node }); - Some(tail) - } - b'0'..=b'9' | b'*' => { - let tail = parse_list(arena, contents, parent, containers)?; - Some(tail) - } - b'C' => { - let (tail, clock) = Clock::parse(contents)?; - arena.append(clock, parent); - Some(tail) - } - b'\'' => { - // TODO: LaTeX environment - None - } - b'-' => { - if let Some((tail, rule)) = Rule::parse(contents) { - arena.append(rule, parent); - Some(tail) - } else { - let tail = parse_list(arena, contents, parent, containers)?; - Some(tail) - } - } - b':' => { - if let Some((tail, (drawer, content))) = Drawer::parse(contents) { - let node = arena.append(drawer, parent); - containers.push(Container::Block { content, node }); - Some(tail) - } else { - let (tail, fixed_width) = FixedWidth::parse(contents)?; - arena.append(fixed_width, parent); - Some(tail) - } - } - b'|' => { - let tail = parse_org_table(arena, contents, containers, parent); - Some(tail) - } - b'+' => { - if let Some((tail, table)) = Table::parse_table_el(contents) { - arena.append(table, parent); - Some(tail) - } else { - let tail = parse_list(arena, contents, parent, containers)?; - Some(tail) - } - } - b'#' => { - if let Some((tail, block)) = RawBlock::parse(contents) { - let (element, content) = block.into_element(); - // avoid use after free - let is_block_container = match element { - Element::CenterBlock(_) - | Element::QuoteBlock(_) - | Element::VerseBlock(_) - | Element::SpecialBlock(_) => true, - _ => false, - }; - let node = arena.append(element, parent); - if is_block_container { - containers.push(Container::Block { content, node }); - } - Some(tail) - } else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) { - let node = arena.append(dyn_block, parent); - containers.push(Container::Block { content, node }); - Some(tail) - } else if let Some((tail, keyword)) = RawKeyword::parse(contents) { - arena.append(keyword.into_element(), parent); - Some(tail) - } else { - let (tail, comment) = Comment::parse(contents)?; - arena.append(comment, parent); - Some(tail) - } - } - _ => None, - } -} - -struct InlinePositions<'a> { - bytes: &'a [u8], - pos: usize, - next: Option, -} - -impl InlinePositions<'_> { - fn new(bytes: &[u8]) -> InlinePositions { - InlinePositions { - bytes, - pos: 0, - next: Some(0), - } - } -} - -impl Iterator for InlinePositions<'_> { - type Item = usize; - - fn next(&mut self) -> Option { - lazy_static::lazy_static! { - static ref PRE_BYTES: BytesConst = - bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); - } - - self.next.take().or_else(|| { - PRE_BYTES.find(&self.bytes[self.pos..]).map(|i| { - self.pos += i + 1; - - match self.bytes[self.pos - 1] { - b'{' => { - self.next = Some(self.pos); - self.pos - 1 - } - b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos, - _ => self.pos - 1, - } - }) - }) - } -} - -pub fn parse_inlines<'a, T: ElementArena<'a>>( - arena: &mut T, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, -) { - let mut tail = content; - - if let Some(tail_) = parse_inline(tail, arena, containers, parent) { - tail = tail_; - } - - while let Some((tail_, i)) = InlinePositions::new(tail.as_bytes()) - .filter_map(|i| parse_inline(&tail[i..], arena, containers, parent).map(|tail| (tail, i))) - .next() - { - if i != 0 { - arena.insert_before_last_child( - Element::Text { - value: tail[0..i].into(), - }, - parent, - ); - } - tail = tail_; - } - - if !tail.is_empty() { - arena.append(Element::Text { value: tail.into() }, parent); - } -} - -pub fn parse_inline<'a, T: ElementArena<'a>>( - contents: &'a str, - arena: &mut T, - containers: &mut Vec>, - parent: NodeId, -) -> Option<&'a str> { - if contents.len() < 3 { - return None; - } - - let byte = contents.as_bytes()[0]; - - match byte { - b'@' => { - let (tail, snippet) = Snippet::parse(contents)?; - arena.append(snippet, parent); - Some(tail) - } - b'{' => { - let (tail, macros) = Macros::parse(contents)?; - arena.append(macros, parent); - Some(tail) - } - b'<' => { - if let Some((tail, _content)) = parse_radio_target(contents) { - arena.append(Element::RadioTarget, parent); - Some(tail) - } else if let Some((tail, target)) = Target::parse(contents) { - arena.append(target, parent); - Some(tail) - } else if let Some((tail, timestamp)) = Timestamp::parse_active(contents) { - arena.append(timestamp, parent); - Some(tail) - } else { - let (tail, timestamp) = Timestamp::parse_diary(contents)?; - arena.append(timestamp, parent); - Some(tail) - } - } - b'[' => { - if let Some((tail, fn_ref)) = FnRef::parse(contents) { - arena.append(fn_ref, parent); - Some(tail) - } else if let Some((tail, link)) = Link::parse(contents) { - arena.append(link, parent); - Some(tail) - } else if let Some((tail, cookie)) = Cookie::parse(contents) { - arena.append(cookie, parent); - Some(tail) - } else { - let (tail, timestamp) = Timestamp::parse_inactive(contents)?; - arena.append(timestamp, parent); - Some(tail) - } - } - b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => { - let (tail, emphasis) = Emphasis::parse(contents, byte)?; - let (element, content) = emphasis.into_element(); - let is_inline_container = match element { - Element::Bold | Element::Strike | Element::Italic | Element::Underline => true, - _ => false, - }; - let node = arena.append(element, parent); - if is_inline_container { - containers.push(Container::Inline { content, node }); - } - Some(tail) - } - b's' => { - let (tail, inline_src) = InlineSrc::parse(contents)?; - arena.append(inline_src, parent); - Some(tail) - } - b'c' => { - let (tail, inline_call) = InlineCall::parse(contents)?; - arena.append(inline_call, parent); - Some(tail) - } - _ => None, - } -} - -pub fn parse_list<'a, T: ElementArena<'a>>( - arena: &mut T, - contents: &'a str, - parent: NodeId, - containers: &mut Vec>, -) -> Option<&'a str> { - let (mut tail, (first_item, content)) = ListItem::parse(contents)?; - let first_item_indent = first_item.indent; - let first_item_ordered = first_item.ordered; - - let parent = arena.append(Element::Document { pre_blank: 0 }, parent); // placeholder - - let node = arena.append(first_item, parent); - containers.push(Container::Block { content, node }); - - while let Some((tail_, (item, content))) = ListItem::parse(tail) { - if item.indent == first_item_indent { - let node = arena.append(item, parent); - containers.push(Container::Block { content, node }); - debug_assert_ne!(tail, tail_); - tail = tail_; - } else { - break; - } - } - - let (tail, post_blank) = blank_lines_count(tail); - - arena.set( - parent, - List { - indent: first_item_indent, - ordered: first_item_ordered, - post_blank, - }, - ); - - Some(tail) -} - -pub fn parse_org_table<'a, T: ElementArena<'a>>( - arena: &mut T, - contents: &'a str, - containers: &mut Vec>, - parent: NodeId, -) -> &'a str { - let (tail, contents) = - lines_while(|line| line.trim_start().starts_with('|'))(contents).unwrap_or((contents, "")); - let (tail, post_blank) = blank_lines_count(tail); - - let mut iter = contents.trim_end().lines().peekable(); - - let mut lines = vec![]; - - let mut has_header = false; - - // TODO: merge contiguous rules - - if let Some(line) = iter.next() { - let line = line.trim_start(); - if !line.starts_with("|-") { - lines.push(line); - } - } - - while let Some(line) = iter.next() { - let line = line.trim_start(); - if iter.peek().is_none() && line.starts_with("|-") { - break; - } else if line.starts_with("|-") { - has_header = true; - } - lines.push(line); - } - - let parent = arena.append( - Table::Org { - tblfm: None, - post_blank, - has_header, - }, - parent, - ); - - for line in lines { - if line.starts_with("|-") { - if has_header { - arena.append(Element::TableRow(TableRow::HeaderRule), parent); - has_header = false; - } else { - arena.append(Element::TableRow(TableRow::BodyRule), parent); - } - } else { - if has_header { - let parent = arena.append(Element::TableRow(TableRow::Header), parent); - for content in line.split_terminator('|').skip(1) { - let node = arena.append(Element::TableCell(TableCell::Header), parent); - containers.push(Container::Inline { - content: content.trim(), - node, - }); - } - } else { - let parent = arena.append(Element::TableRow(TableRow::Body), parent); - for content in line.split_terminator('|').skip(1) { - let node = arena.append(Element::TableCell(TableCell::Body), parent); - containers.push(Container::Inline { - content: content.trim(), - node, - }); - } - } - } - } - - tail -} - -pub fn blank_lines_count(input: &str) -> (&str, usize) { - crate::parse::combinators::blank_lines_count(input).unwrap_or((input, 0)) -} - -pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> { - let (input_, level) = parse_headline_level(input)?; - let (input_, content) = lines_while(move |line| { - parse_headline_level(line) - .map(|(_, l)| l > level) - .unwrap_or(true) - })(input_) - .unwrap_or((input_, "")); - Some((input_, (&input[0..level + content.len()], level))) -} - -pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> { - let (input, stars) = take_while1::<_, _, ()>(|c: char| c == '*')(input).ok()?; - - if input.starts_with(' ') || input.starts_with('\n') || input.is_empty() { - Some((input, stars.len())) - } else { - None - } -} diff --git a/src/replace.rs b/src/replace.rs new file mode 100644 index 0000000..aa63c95 --- /dev/null +++ b/src/replace.rs @@ -0,0 +1,326 @@ +use rowan::{ + ast::{support, AstNode}, + SyntaxNode, TextRange, TextSize, TokenAtOffset, +}; + +use crate::ast::Headline; +use crate::syntax::{ + combinator::line_starts_iter, document::document_node, headline::headline_node, OrgLanguage, +}; +use crate::Org; + +#[derive(Debug)] +enum RangeShape { + InsideHeadline { headline: Headline, level: usize }, + ExactHeadline { headline: Headline, level: usize }, + Other, +} + +impl RangeShape { + pub fn new(mut node: SyntaxNode, range: TextRange) -> Self { + let mut result = RangeShape::Other; + + 'l: loop { + for headline in support::children::(&node) { + let level = headline.level(); + let start = headline.syntax.text_range().start(); + let end = headline.syntax.text_range().end(); + + if headline.syntax.text_range() == range { + result = RangeShape::ExactHeadline { headline, level }; + break 'l; + } + + if TextRange::new(start + TextSize::from(level as u32 + 1), end) + .contains_range(range) + { + node = headline.syntax.clone(); + result = RangeShape::InsideHeadline { headline, level }; + continue 'l; + } + } + break; + } + + result + } +} + +#[derive(Debug, PartialEq)] +enum ReplaceWithShape { + IncludeHeadline { level: usize }, + ExactHeadline { level: usize }, + Other, +} + +impl ReplaceWithShape { + fn new(text: &str) -> Self { + let mut result = ReplaceWithShape::Other; + + for start in line_starts_iter(text) { + let level = text[start..].bytes().take_while(|&c| c == b'*').count(); + + if level == 0 { + continue; + } + + if !matches!(text[start..].as_bytes().get(level), Some(b' ')) { + continue; + } + + match result { + ReplaceWithShape::IncludeHeadline { level: l } => { + if level < l { + result = ReplaceWithShape::IncludeHeadline { level } + } + } + ReplaceWithShape::ExactHeadline { level: l } => { + if level <= l { + result = ReplaceWithShape::IncludeHeadline { level } + } + } + ReplaceWithShape::Other => { + if start == 0 { + result = ReplaceWithShape::ExactHeadline { level } + } else { + result = ReplaceWithShape::IncludeHeadline { level } + } + } + } + } + + result + } +} + +impl Org { + /// Replace specified range with given text, and reparse the syntax tree with current config + /// + /// This method optimizes parsing by analyzing the selected range and given text, and reducing + /// the amount of data processed by parser. + /// + /// ```rust + /// use orgize::{Org, ast::Headline, TextRange, TextSize}; + /// + /// let mut org = Org::parse("** hello"); + /// let hdl = org.first_node::().unwrap(); + /// assert_eq!(hdl.level(), 2); + /// + /// // replace '**' with '*****' + /// org.replace_range(TextRange::new(0.into(), 2.into()), "*****"); + /// // since the syntax tree is changed, we have to query again + /// let hdl = org.first_node::().unwrap(); + /// assert_eq!(hdl.level(), 5); + /// ``` + pub fn replace_range(&mut self, range: TextRange, replace_with: impl AsRef) { + let replace_with = replace_with.as_ref(); + match ( + RangeShape::new(self.document().syntax, range), + ReplaceWithShape::new(replace_with), + ) { + ( + RangeShape::ExactHeadline { headline, level }, + ReplaceWithShape::IncludeHeadline { level: new_level }, + ) + | ( + RangeShape::InsideHeadline { headline, level }, + ReplaceWithShape::IncludeHeadline { level: new_level }, + ) if level < new_level => self.replace_headline(headline, range, replace_with), + + ( + RangeShape::ExactHeadline { headline, level }, + ReplaceWithShape::ExactHeadline { level: new_level }, + ) if level <= new_level + // non-last headline must ends with a newline + && (headline.end() == self.document().end() + || replace_with.ends_with(&['\n', '\r'])) => + { + self.replace_headline(headline, range, replace_with) + } + + ( + RangeShape::InsideHeadline { headline, level }, + ReplaceWithShape::ExactHeadline { level: new_level }, + ) if level <= new_level && follows_newline(headline.syntax(), range.start()) => { + self.replace_headline(headline, range, replace_with) + } + + _ => self.full_parse(range, replace_with), + } + } + + fn full_parse(&mut self, range: TextRange, replace_with: &str) { + if self.document().syntax().text_range() == range { + let input = (replace_with, &self.config).into(); + self.green = document_node(input).unwrap().1.into_node().unwrap(); + } else { + let start: usize = range.start().into(); + let end: usize = range.end().into(); + let mut text = self.green.to_string(); + text.replace_range(start..end, replace_with); + let input = (text.as_ref(), &self.config).into(); + self.green = document_node(input).unwrap().1.into_node().unwrap(); + } + } + + fn replace_headline(&mut self, headline: Headline, range: TextRange, replace_with: &str) { + if headline.syntax().text_range() == range { + let input = (replace_with, &self.config).into(); + + self.green = headline + .syntax + .replace_with(headline_node(input).unwrap().1.into_node().unwrap()); + } else { + let offset: usize = headline.syntax.text_range().start().into(); + let start: usize = range.start().into(); + let end: usize = range.end().into(); + + let mut text = headline.syntax.to_string(); + text.replace_range((start - offset)..(end - offset), replace_with); + + let input = (text.as_ref(), &self.config).into(); + + self.green = headline + .syntax + .replace_with(headline_node(input).unwrap().1.into_node().unwrap()); + } + } +} + +fn follows_newline(syntax: &SyntaxNode, offset: TextSize) -> bool { + match syntax.token_at_offset(offset) { + TokenAtOffset::None => false, + TokenAtOffset::Single(t) => { + let offset: usize = (offset - t.text_range().start()).into(); + t.text()[offset..].ends_with('\n') || t.text()[offset..].ends_with('\r') + } + TokenAtOffset::Between(t, _) => t.text().ends_with('\n') || t.text().ends_with('\r'), + } +} + +#[test] +fn replace() { + assert!(follows_newline( + Org::parse("\n*a*").document().syntax(), + TextSize::new(1) + )); + assert!(follows_newline( + Org::parse(" \na").document().syntax(), + TextSize::new(1) + )); + assert!(follows_newline( + Org::parse(" \ra").document().syntax(), + TextSize::new(1) + )); + assert!(!follows_newline( + Org::parse(" *a*").document().syntax(), + TextSize::new(1) + )); + assert!(!follows_newline( + Org::parse(" a").document().syntax(), + TextSize::new(1) + )); + + assert_eq!(ReplaceWithShape::new(""), ReplaceWithShape::Other); + assert_eq!(ReplaceWithShape::new(" ** a"), ReplaceWithShape::Other); + assert_eq!( + ReplaceWithShape::new("\n** a"), + ReplaceWithShape::IncludeHeadline { level: 2 } + ); + assert_eq!( + ReplaceWithShape::new("** a"), + ReplaceWithShape::ExactHeadline { level: 2 } + ); + assert_eq!( + ReplaceWithShape::new("** a\n* 1"), + ReplaceWithShape::IncludeHeadline { level: 1 } + ); + assert_eq!( + ReplaceWithShape::new("* a\n** 1"), + ReplaceWithShape::ExactHeadline { level: 1 } + ); + assert_eq!( + ReplaceWithShape::new("** a\n** 1"), + ReplaceWithShape::IncludeHeadline { level: 2 } + ); + + assert!(matches!( + RangeShape::new( + Org::parse("** abc\n** b").document().syntax, + TextRange::new(0.into(), 7.into()) + ), + RangeShape::ExactHeadline { level: 2, .. } + )); + assert!(matches!( + RangeShape::new( + Org::parse("** abc\n** b").document().syntax, + TextRange::new(3.into(), 7.into()) + ), + RangeShape::InsideHeadline { level: 2, .. } + )); + assert!(matches!( + RangeShape::new( + Org::parse("** abc\n** b").document().syntax, + TextRange::new(2.into(), 7.into()) + ), + RangeShape::Other + )); + assert!(matches!( + RangeShape::new( + Org::parse("* abc\n** b").document().syntax, + TextRange::new(4.into(), 7.into()) + ), + RangeShape::InsideHeadline { level: 1, .. } + )); + + macro_rules! t { + ($input:literal, $replace:literal) => { + let start = $input.find('|').unwrap(); + let end = $input.rfind('|').unwrap(); + + let input = format!( + "{}{}{}", + &$input[0..start], + &$input[start + 1..end], + &$input[end + 1..] + ); + let output = format!("{}{}{}", &$input[0..start], $replace, &$input[end + 1..]); + + let mut org = Org::parse(input); + org.replace_range( + TextRange::new((start as u32).into(), (end as u32 - 1).into()), + $replace, + ); + + debug_assert_eq!( + format!("{:#?}", org.document().syntax), + format!("{:#?}", Org::parse(output).document().syntax), + ); + }; + } + + t!("||", ""); + t!("||", "** abc"); + t!("*** abc |edf|", "fde"); + t!("*|** abc edf|", "fde"); + t!("* abc \n|** edf|", "** abc"); + t!("* ab|c \n*| edf", "** abc"); + + t!("* abc \n|** edf|", "** abc"); + t!("* abc \n|** edf|", "** eee\n** eee"); + t!("* abc \n|** edf|", "*** abc"); + t!("* abc \n*|* edf|", "*** abc"); + t!("* abc \n**| edf|", "*** abc"); + t!("* abc \n**| |edf", "*** abc"); + t!("* abc \n** |edf|", "*** abc"); + t!("* abc \n** |edf|", "\n*** abc"); + t!("* abc \n** |edf|", "\n** abc"); + t!("* abc \n** |edf|", "\n* abc"); + t!("* abc \n** \n|edf|", "* abc"); + t!("* abc \n** \n|edf|", "* abc\n* abc"); + t!("* abc \n** |edf|", "* abc"); + t!("* abc \n** |edf|", "* abc\n* abc"); + t!("* abc \n|* edf\n|* gh", "* hg"); + t!("* abc \n|* edf\n|* gh", "* hg\n"); + t!("* abc \n* edf\n|* gh|", "* hg"); +} diff --git a/src/syntax/block.rs b/src/syntax/block.rs new file mode 100644 index 0000000..4f5ec75 --- /dev/null +++ b/src/syntax/block.rs @@ -0,0 +1,305 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, tag_no_case, take_while, take_while1}, + character::complete::{alpha1, space0, space1}, + combinator::{cond, opt}, + sequence::{separated_pair, tuple}, + IResult, InputTake, +}; + +use super::{ + combinator::{ + blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement, + NodeBuilder, + }, + element::element_nodes, + input::Input, + keyword::affiliated_keyword_nodes, + SyntaxKind::*, +}; + +fn block_node_base(input: Input) -> IResult { + let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?; + let (input, (block_begin, name)) = block_begin_node(input)?; + let (input, pre_blank) = blank_lines(input)?; + + let kind = match name { + s if s.eq_ignore_ascii_case("COMMENT") => COMMENT_BLOCK, + s if s.eq_ignore_ascii_case("EXAMPLE") => EXAMPLE_BLOCK, + s if s.eq_ignore_ascii_case("EXPORT") => EXPORT_BLOCK, + s if s.eq_ignore_ascii_case("SRC") => SOURCE_BLOCK, + s if s.eq_ignore_ascii_case("CENTER") => CENTER_BLOCK, + s if s.eq_ignore_ascii_case("QUOTE") => QUOTE_BLOCK, + s if s.eq_ignore_ascii_case("VERSE") => VERSE_BLOCK, + _ => SPECIAL_BLOCK, + }; + + for (input, contents) in line_starts_iter(&input).map(|i| input.take_split(i)) { + if let Ok((input, block_end)) = block_end_node(input, name) { + let (input, post_blank) = blank_lines(input)?; + + let mut children = vec![]; + children.extend(affiliated_keywords); + children.push(block_begin); + children.extend(pre_blank); + if kind.is_greater_element() { + children.push(node(BLOCK_CONTENT, element_nodes(contents)?)); + } else { + children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents))); + } + children.push(block_end); + children.extend(post_blank); + return Ok((input, node(kind, children))); + } + } + + Err(nom::Err::Error(())) +} + +fn block_begin_node(input: Input) -> IResult { + let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?; + + let mut b = NodeBuilder::new(); + b.ws(ws1); + b.text(begin); + b.text(name); + + if name.eq_ignore_ascii_case("SRC") { + let (input, language) = opt(tuple(( + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + )))(input)?; + let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?; + let (input, ws1) = space0(input)?; + let (input, (parameters, ws2, nl)) = trim_line_end(input)?; + + if let Some((ws, language)) = language { + b.ws(ws); + b.token(SRC_BLOCK_LANGUAGE, language); + } + if let Some((ws, switches)) = switches { + b.ws(ws); + b.token(SRC_BLOCK_SWITCHES, switches); + } + b.ws(ws1); + if !parameters.is_empty() { + b.token(SRC_BLOCK_PARAMETERS, parameters); + } + b.ws(ws2); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } else if name.eq_ignore_ascii_case("EXPORT") { + let (input, ty) = opt(tuple(( + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + )))(input)?; + let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; + let (input, nl) = eol_or_eof(input)?; + + if let Some((ws, ty)) = ty { + b.ws(ws); + b.token(EXPORT_BLOCK_TYPE, ty); + } + b.text(data); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } else { + let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?; + let (input, nl) = eol_or_eof(input)?; + + b.text(data); + b.nl(nl); + Ok((input, (b.finish(BLOCK_BEGIN), name.as_str()))) + } +} + +fn source_block_switches(input: Input) -> IResult { + let mut i = input; + + while !i.is_empty() { + match tuple::<_, _, (), _>(( + cond(i.len() != input.len(), space1), + alt(( + separated_pair( + alt((tag("-l"), tag("-n"))), + space1, + take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + ), + tuple((tag("+"), alpha1)), + tuple((tag("-"), alpha1)), + )), + ))(i) + { + Ok((i_, _)) => i = i_, + _ => break, + } + } + + let len = input.len() - i.len(); + + if len == 0 { + Err(nom::Err::Error(())) + } else { + Ok(input.take_split(len)) + } +} + +fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult, GreenElement, ()> { + let (input, (ws, end, name, ws_, nl)) = + tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?; + + let mut b = NodeBuilder::new(); + b.ws(ws); + b.text(end); + b.text(name); + b.ws(ws_); + b.nl(nl); + + Ok((input, b.finish(BLOCK_END))) +} + +fn comma_quoted_text_nodes(input: Input) -> Vec { + let mut nodes = vec![]; + + let s = input.as_str(); + + let mut start = 0; + for i in line_starts_iter(s) { + // line must start with either ",*" or ",#+" + if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") { + continue; + } + + let text = &s[start..i]; + if !text.is_empty() { + nodes.push(token(TEXT, text)); + } + + nodes.push(token(COMMA, ",")); + start = i + 1; + } + + if !s[start..].is_empty() { + nodes.push(token(TEXT, &s[start..])); + } + + nodes +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn block_node(input: Input) -> IResult { + crate::lossless_parser!(block_node_base, input) +} + +#[test] +fn test_parse() { + use crate::ast::{ExampleBlock, SourceBlock}; + use crate::tests::to_ast; + + let to_src_block = to_ast::(block_node); + let to_example_block = to_ast::(block_node); + + insta::assert_debug_snapshot!( + to_example_block( +r#"#+BEGIN_EXAMPLE +,* headline +,#+block +text + #+END_EXAMPLE"# + ).syntax, + @r###" + EXAMPLE_BLOCK@0..59 + BLOCK_BEGIN@0..16 + TEXT@0..8 "#+BEGIN_" + TEXT@8..15 "EXAMPLE" + NEW_LINE@15..16 "\n" + BLOCK_CONTENT@16..42 + COMMA@16..17 "," + TEXT@17..28 "* headline\n" + COMMA@28..29 "," + TEXT@29..42 "#+block\ntext\n" + BLOCK_END@42..59 + WHITESPACE@42..46 " " + TEXT@46..52 "#+END_" + TEXT@52..59 "EXAMPLE" + "### + ); + + insta::assert_debug_snapshot!( + to_src_block( +r#"#+BEGIN_SRC + + + #+END_SRC"# + ).syntax, + @r###" + SOURCE_BLOCK@0..27 + BLOCK_BEGIN@0..12 + TEXT@0..8 "#+BEGIN_" + TEXT@8..11 "SRC" + NEW_LINE@11..12 "\n" + BLANK_LINE@12..13 "\n" + BLANK_LINE@13..14 "\n" + BLOCK_CONTENT@14..14 + BLOCK_END@14..27 + WHITESPACE@14..18 " " + TEXT@18..24 "#+END_" + TEXT@24..27 "SRC" + "### + ); + + insta::assert_debug_snapshot!( + to_src_block( +r#"#+begin_src + #+end_src"# + ).syntax, + @r###" + SOURCE_BLOCK@0..25 + BLOCK_BEGIN@0..12 + TEXT@0..8 "#+begin_" + TEXT@8..11 "src" + NEW_LINE@11..12 "\n" + BLOCK_CONTENT@12..12 + BLOCK_END@12..25 + WHITESPACE@12..16 " " + TEXT@16..22 "#+end_" + TEXT@22..25 "src" + "### + ); + + insta::assert_debug_snapshot!( + to_src_block( +r#"#+BEGIN_SRC javascript -n 20 -r :var n=0, l=2 :foo=bar +alert('Hello World!'); + #+END_SRC + + "#).syntax, + @r###" + SOURCE_BLOCK@0..100 + BLOCK_BEGIN@0..58 + TEXT@0..8 "#+BEGIN_" + TEXT@8..11 "SRC" + WHITESPACE@11..12 " " + SRC_BLOCK_LANGUAGE@12..22 "javascript" + WHITESPACE@22..24 " " + SRC_BLOCK_SWITCHES@24..32 "-n 20 -r" + WHITESPACE@32..34 " " + SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar" + NEW_LINE@57..58 "\n" + BLOCK_CONTENT@58..81 + TEXT@58..81 "alert('Hello World!');\n" + BLOCK_END@81..95 + WHITESPACE@81..85 " " + TEXT@85..91 "#+END_" + TEXT@91..94 "SRC" + NEW_LINE@94..95 "\n" + BLANK_LINE@95..96 "\n" + BLANK_LINE@96..100 " " + "### + ); + + // TODO: more testing +} diff --git a/src/syntax/clock.rs b/src/syntax/clock.rs new file mode 100644 index 0000000..92b0557 --- /dev/null +++ b/src/syntax/clock.rs @@ -0,0 +1,134 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{digit1, space0}, + combinator::{map, opt, recognize}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder, + }, + input::Input, + timestamp::{timestamp_active_node, timestamp_inactive_node}, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn clock_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + space0, + tag("CLOCK:"), + space0, + alt((timestamp_inactive_node, timestamp_active_node)), + opt(tuple(( + space0, + double_arrow_token, + space0, + recognize(tuple((digit1, colon_token, digit1))), + ))), + space0, + eol_or_eof, + blank_lines, + )), + |(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| { + let mut b = NodeBuilder::new(); + + b.ws(ws); + b.text(clock); + b.ws(ws_); + b.push(timestamp); + if let Some((ws, double_arrow, ws_, time)) = duration { + b.ws(ws); + b.push(double_arrow); + b.ws(ws_); + b.text(time); + } + b.ws(ws__); + b.nl(nl); + b.children.extend(post_blank); + b.finish(SyntaxKind::CLOCK) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::ast::Clock; + use crate::tests::to_ast; + + let to_clock = to_ast::(clock_node); + + insta::assert_debug_snapshot!( + to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax, + @r###" + CLOCK@0..29 + TEXT@0..6 "CLOCK:" + WHITESPACE@6..7 " " + TIMESTAMP_INACTIVE@7..29 + L_BRACKET@7..8 "[" + TIMESTAMP_YEAR@8..12 "2003" + MINUS@12..13 "-" + TIMESTAMP_MONTH@13..15 "09" + MINUS@15..16 "-" + TIMESTAMP_DAY@16..18 "16" + WHITESPACE@18..19 " " + TIMESTAMP_DAYNAME@19..22 "Tue" + WHITESPACE@22..23 " " + TIMESTAMP_HOUR@23..25 "09" + COLON@25..26 ":" + TIMESTAMP_MINUTE@26..28 "39" + R_BRACKET@28..29 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax, + @r###" + CLOCK@0..64 + TEXT@0..6 "CLOCK:" + WHITESPACE@6..7 " " + TIMESTAMP_INACTIVE@7..53 + L_BRACKET@7..8 "[" + TIMESTAMP_YEAR@8..12 "2003" + MINUS@12..13 "-" + TIMESTAMP_MONTH@13..15 "09" + MINUS@15..16 "-" + TIMESTAMP_DAY@16..18 "16" + WHITESPACE@18..19 " " + TIMESTAMP_DAYNAME@19..22 "Tue" + WHITESPACE@22..23 " " + TIMESTAMP_HOUR@23..25 "09" + COLON@25..26 ":" + TIMESTAMP_MINUTE@26..28 "39" + R_BRACKET@28..29 "]" + MINUS2@29..31 "--" + L_BRACKET@31..32 "[" + TIMESTAMP_YEAR@32..36 "2003" + MINUS@36..37 "-" + TIMESTAMP_MONTH@37..39 "09" + MINUS@39..40 "-" + TIMESTAMP_DAY@40..42 "16" + WHITESPACE@42..43 " " + TIMESTAMP_DAYNAME@43..46 "Tue" + WHITESPACE@46..47 " " + TIMESTAMP_HOUR@47..49 "10" + COLON@49..50 ":" + TIMESTAMP_MINUTE@50..52 "39" + R_BRACKET@52..53 "]" + WHITESPACE@53..54 " " + DOUBLE_ARROW@54..56 "=>" + WHITESPACE@56..58 " " + TEXT@58..62 "1:00" + NEW_LINE@62..63 "\n" + BLANK_LINE@63..64 "\n" + "### + ); +} diff --git a/src/syntax/cloze.rs b/src/syntax/cloze.rs new file mode 100644 index 0000000..20645a4 --- /dev/null +++ b/src/syntax/cloze.rs @@ -0,0 +1,162 @@ +use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake}; + +use crate::syntax::{ + combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token}, + object::standard_object_nodes, +}; + +use super::{ + combinator::{GreenElement, NodeBuilder}, + input::Input, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn cloze_node(input: Input) -> IResult { + crate::lossless_parser!(cloze_node_base, input) +} + +fn cloze_node_base(input: Input) -> IResult { + let (input, l_curly2) = l_curly2_token(input)?; + + let mut inside_latex = false; + let mut text_end = 0; + for (index, byte) in input.bytes().enumerate() { + match byte { + b'}' if !inside_latex => { + text_end = index; + break; + } + b'$' => { + inside_latex = !inside_latex; + } + _ => {} + } + } + + if text_end == 0 { + return Err(nom::Err::Error(())); + } + + let (input, text) = input.take_split(text_end); + + let (input, r_curly) = r_curly_token(input)?; + + let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?; + + let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?; + + let (input, r_curly_) = r_curly_token(input)?; + + let mut b = NodeBuilder::new(); + + b.push(l_curly2); + b.children.extend(standard_object_nodes(text)); + b.push(r_curly); + + if let Some((l_curly, hint, r_curly)) = hint { + b.push(l_curly); + b.token(SyntaxKind::TEXT, hint); + b.push(r_curly); + } + + if let Some((at, id)) = id { + b.push(at); + b.token(SyntaxKind::TEXT, id); + } + + b.push(r_curly_); + + Ok((input, b.finish(SyntaxKind::CLOZE))) +} + +#[test] +fn parse() { + use crate::ast::Cloze; + use crate::config::ParseConfig; + use crate::tests::to_ast; + + let to_cloze = to_ast::(cloze_node); + + insta::assert_debug_snapshot!( + to_cloze("{{text}}").syntax, + @r###" + CLOZE@0..8 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + R_CURLY@7..8 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}@id}").syntax, + @r###" + CLOZE@0..11 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + AT@7..8 "@" + TEXT@8..10 "id" + R_CURLY@10..11 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}{hint}}").syntax, + @r###" + CLOZE@0..14 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + L_CURLY@7..8 "{" + TEXT@8..12 "hint" + R_CURLY@12..13 "}" + R_CURLY@13..14 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{text}{hint}@id}").syntax, + @r###" + CLOZE@0..17 + L_CURLY2@0..2 "{{" + TEXT@2..6 "text" + R_CURLY@6..7 "}" + L_CURLY@7..8 "{" + TEXT@8..12 "hint" + R_CURLY@12..13 "}" + AT@13..14 "@" + TEXT@14..16 "id" + R_CURLY@16..17 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax, + @r###" + CLOZE@0..28 + L_CURLY2@0..2 "{{" + LATEX_FRAGMENT@2..15 + DOLLAR@2..3 "$" + TEXT@3..14 "\\frac{a}{b}" + DOLLAR@14..15 "$" + R_CURLY@15..16 "}" + L_CURLY@16..17 "{" + TEXT@17..26 "fractions" + R_CURLY@26..27 "}" + R_CURLY@27..28 "}" + "### + ); + + let config = &ParseConfig::default(); + + assert!(cloze_node(("{{}}", config).into()).is_err()); + assert!(cloze_node(("{{text}", config).into()).is_err()); + assert!(cloze_node(("{text}}", config).into()).is_err()); + assert!(cloze_node(("{{text}{}", config).into()).is_err()); + assert!(cloze_node(("{{text}a}", config).into()).is_err()); +} diff --git a/src/syntax/combinator.rs b/src/syntax/combinator.rs new file mode 100644 index 0000000..bd69797 --- /dev/null +++ b/src/syntax/combinator.rs @@ -0,0 +1,327 @@ +use memchr::{memchr2, memchr2_iter, Memchr2}; +use nom::{bytes::complete::tag, IResult, InputTake, Slice}; +use rowan::{GreenNode, GreenToken, Language, NodeOrToken}; +use std::iter::once; + +use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*}; + +pub type GreenElement = NodeOrToken; + +#[inline] +pub fn token(kind: SyntaxKind, input: &str) -> GreenElement { + GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input)) +} + +#[inline] +pub fn node(kind: SyntaxKind, children: I) -> GreenElement +where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, +{ + GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children)) +} + +macro_rules! token_parser { + ($name:ident, $token:literal, $kind:ident) => { + #[doc = "Recognizes `"] + #[doc = $token] + #[doc = "` and returns GreenToken"] + pub fn $name(input: Input) -> IResult { + let (i, o) = tag($token)(input)?; + Ok((i, token($kind, o.as_str()))) + } + }; +} + +token_parser!(l_bracket_token, "[", L_BRACKET); +token_parser!(r_bracket_token, "]", R_BRACKET); +token_parser!(l_bracket2_token, "[[", L_BRACKET2); +token_parser!(r_bracket2_token, "]]", R_BRACKET2); +token_parser!(l_parens_token, "(", L_PARENS); +token_parser!(r_parens_token, ")", R_PARENS); +token_parser!(l_angle_token, "<", L_ANGLE); +token_parser!(r_angle_token, ">", R_ANGLE); +token_parser!(l_curly_token, "{", L_CURLY); +#[cfg(feature = "syntax-org-fc")] +token_parser!(l_curly2_token, "{{", L_CURLY2); +token_parser!(r_curly_token, "}", R_CURLY); +token_parser!(l_curly3_token, "{{{", L_CURLY3); +token_parser!(r_curly3_token, "}}}", R_CURLY3); +token_parser!(l_angle2_token, "<<", L_ANGLE2); +token_parser!(r_angle2_token, ">>", R_ANGLE2); +token_parser!(l_angle3_token, "<<<", L_ANGLE3); +token_parser!(r_angle3_token, ">>>", R_ANGLE3); +token_parser!(at_token, "@", AT); +token_parser!(at2_token, "@@", AT2); +token_parser!(minus2_token, "--", MINUS2); +// token_parser!(percent_token, "%", PERCENT); +token_parser!(percent2_token, "%%", PERCENT2); +// token_parser!(slash_token, "/", SLASH); +token_parser!(backslash_token, "\\", BACKSLASH); +token_parser!(underscore_token, "_", UNDERSCORE); +// token_parser!(star_token, "*", STAR); +// token_parser!(plus_token, "+", PLUS); +token_parser!(minus_token, "-", MINUS); +token_parser!(colon_token, ":", COLON); +token_parser!(colon2_token, "::", COLON2); +token_parser!(pipe_token, "|", PIPE); +token_parser!(dollar_token, "$", DOLLAR); +token_parser!(dollar2_token, "$$", DOLLAR2); +// token_parser!(equal_token, "=", EQUAL); +// token_parser!(tilde_token, "~", TILDE); +token_parser!(hash_plus_token, "#+", HASH_PLUS); +token_parser!(caret_token, "^", CARET); +token_parser!(hash_token, "#", HASH); +token_parser!(double_arrow_token, "=>", DOUBLE_ARROW); + +macro_rules! lossless_parser { + ($parser:expr, $input:expr) => {{ + let i_ = $input; + let (i, o) = $parser($input)?; + cfg_if::cfg_if! { + if #[cfg(feature = "tracing")] { + tracing::trace!(consumed = o.to_string()); + } + } + debug_assert_eq!( + &i_.as_str()[0..(i_.len() - i.len())], + &o.to_string(), + stringify!("parser must be lossless") + ); + Ok((i, o)) + }}; +} + +pub(crate) use lossless_parser; + +/// Takes all blank lines +pub fn blank_lines(input: Input) -> IResult, ()> { + if input.is_empty() { + return Ok((input, vec![])); + } + + let mut lines = vec![]; + let mut start = 0; + let bytes = input.as_bytes(); + + for index in line_ends_iter(input.as_str()) { + if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) { + lines.push(token(BLANK_LINE, &input.as_str()[start..index])); + start = index; + } else { + break; + } + } + + Ok((input.slice(start..), lines)) +} + +#[test] +fn test_blank_lines() { + use crate::config::ParseConfig; + let config = &ParseConfig::default(); + let (input, output) = blank_lines(("", config).into()).unwrap(); + assert_eq!(input.as_str(), ""); + assert_eq!(output, vec![]); + + let (input, output) = blank_lines(("\n", config).into()).unwrap(); + assert_eq!(input.as_str(), ""); + assert_eq!(output.len(), 1); + assert_eq!(output[0].to_string(), "\n"); + + let (input, output) = blank_lines((" t", config).into()).unwrap(); + assert_eq!(input.as_str(), " t"); + assert_eq!(output, vec![]); + + let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap(); + assert_eq!(input.as_str(), ""); + assert_eq!(output.len(), 5); + assert_eq!(output[0].to_string(), " \r\n"); + assert_eq!(output[1].to_string(), "\n"); + assert_eq!(output[2].to_string(), "\t\t\r\n"); + assert_eq!(output[3].to_string(), " \n"); + assert_eq!(output[4].to_string(), " "); + + let (input, output) = + blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap(); + assert_eq!(input.as_str(), " t\n "); + assert_eq!(output.len(), 6); + assert_eq!(output[0].to_string(), "\r\n"); + assert_eq!(output[1].to_string(), "\n"); + assert_eq!(output[2].to_string(), "\t\t\r\n"); + assert_eq!(output[3].to_string(), " \n"); + assert_eq!(output[4].to_string(), "\r"); + assert_eq!(output[5].to_string(), " \r"); +} + +/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding +pub fn trim_line_end(input: Input) -> IResult { + let bytes = input.as_bytes(); + + let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) { + Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => ( + input.slice(i + 2..), + input.slice(0..i), + input.slice(i..i + 2), + ), + Some(i) => ( + input.slice(i + 1..), + input.slice(0..i), + input.slice(i..i + 1), + ), + _ => (input.of(""), input, input.of("")), + }; + + let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) { + Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)), + None => (contents.of(""), contents), + }; + + Ok((input, (contents, ws, nl))) +} + +#[test] +fn test_trim_line_end() { + use crate::config::ParseConfig; + let config = &ParseConfig::default(); + let (input, output) = trim_line_end(("", config).into()).unwrap(); + assert_eq!(input.as_str(), ""); + assert_eq!(output.0.as_str(), ""); + assert_eq!(output.1.as_str(), ""); + assert_eq!(output.2.as_str(), ""); + + let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap(); + assert_eq!(input.as_str(), ""); + assert_eq!(output.0.as_str(), "* hello, world :abc:"); + assert_eq!(output.1.as_str(), ""); + assert_eq!(output.2.as_str(), ""); + + let (input, output) = + trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap(); + assert_eq!(input.as_str(), "rest\n"); + assert_eq!(output.0.as_str(), "* hello, world :abc:"); + assert_eq!(output.1.as_str(), " "); + assert_eq!(output.2.as_str(), "\r\n"); + + let (input, output) = trim_line_end((" \rr", config).into()).unwrap(); + assert_eq!(input.as_str(), "r"); + assert_eq!(output.0.as_str(), ""); + assert_eq!(output.1.as_str(), " "); + assert_eq!(output.2.as_str(), "\r"); +} + +/// Recognizes a line ending \r, \n, \r\n or end of file +pub fn eol_or_eof(input: Input) -> IResult { + let mut bytes = input.bytes(); + + let count = match bytes.next() { + Some(b'\n') => 1, + Some(b'\r') => { + if matches!(bytes.next(), Some(b'\n')) { + 2 + } else { + 1 + } + } + None => 0, + _ => return Err(nom::Err::Error(())), + }; + + Ok(input.take_split(count)) +} + +struct LineStart<'a> { + bytes: &'a [u8], + iter: Memchr2<'a>, +} + +impl<'a> LineStart<'a> { + fn new(input: &'a str) -> Self { + let bytes = input.as_bytes(); + LineStart { + bytes, + iter: memchr2_iter(b'\r', b'\n', bytes), + } + } +} + +impl<'a> Iterator for LineStart<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + let i = self.iter.next()?; + if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') { + let ii = self.iter.next(); + debug_assert_eq!(i + 1, ii.unwrap()); + Some(i + 2) + } else { + Some(i + 1) + } + } +} + +/// Returns an iterator of positions of line start, including zero +pub fn line_starts_iter(s: &str) -> impl Iterator + '_ { + once(0).chain(LineStart::new(s)) +} + +/// Returns an iterator of positions of line end, including eof +pub fn line_ends_iter(s: &str) -> impl Iterator + '_ { + LineStart::new(s).chain(once(s.len())) +} + +pub struct NodeBuilder { + pub children: Vec, +} + +impl NodeBuilder { + pub fn new() -> NodeBuilder { + NodeBuilder { children: vec![] } + } + + pub fn ws(&mut self, i: Input) { + if !i.is_empty() { + debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace())); + self.children.push(i.ws_token()) + } + } + + pub fn nl(&mut self, i: Input) { + if !i.is_empty() { + debug_assert!( + i.s == "\n" || i.s == "\r\n" || i.s == "\r", + "{:?} should be a new line", + i.s + ); + self.children.push(i.nl_token()) + } + } + + pub fn text(&mut self, i: Input) { + if !i.is_empty() { + self.children.push(i.text_token()) + } + } + + pub fn token(&mut self, kind: SyntaxKind, i: Input) { + self.children.push(i.token(kind)) + } + + pub fn push(&mut self, elem: GreenElement) { + self.children.push(elem) + } + + pub fn push_opt(&mut self, elem: Option) { + if let Some(elem) = elem { + self.children.push(elem) + } + } + + pub fn len(&self) -> usize { + self.children.len() + } + + pub fn finish(self, kind: SyntaxKind) -> GreenElement { + GreenElement::Node(GreenNode::new(kind.into(), self.children)) + } +} diff --git a/src/syntax/comment.rs b/src/syntax/comment.rs new file mode 100644 index 0000000..33c7805 --- /dev/null +++ b/src/syntax/comment.rs @@ -0,0 +1,115 @@ +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::{space0, space1}, + combinator::{iterator, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, + input::Input, + SyntaxKind, +}; + +fn comment_node_base(input: Input) -> IResult { + let mut b = NodeBuilder::new(); + + let mut iter = iterator( + input, + opt(tuple(( + space0, + tag("#"), + opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), + eol_or_eof, + ))), + ); + + for (idx, option) in iter.enumerate() { + match option { + Some((ws, common, content, eol)) => { + b.ws(ws); + b.token(SyntaxKind::HASH, common); + if let Some((ws, text)) = content { + b.ws(ws); + b.text(text); + } + b.text(eol); + } + _ if idx == 0 => return Err(nom::Err::Error(())), + _ => break, + } + } + + let (input, _) = iter.finish()?; + + let (input, post_blank) = blank_lines(input)?; + + b.children.extend(post_blank); + + Ok((input, b.finish(SyntaxKind::COMMENT))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn comment_node(input: Input) -> IResult { + crate::lossless_parser!(comment_node_base, input) +} + +#[test] +fn parse() { + use crate::{ + syntax::{comment::comment_node, input::Input, SyntaxNode}, + ParseConfig, + }; + + let t = |input: &str| { + SyntaxNode::new_root( + comment_node(Input { + s: input, + c: &ParseConfig::default(), + }) + .unwrap() + .1 + .into_node() + .unwrap(), + ) + }; + + insta::assert_debug_snapshot!( + t("#"), + @r###" + COMMENT@0..1 + HASH@0..1 "#" + "### + ); + + insta::assert_debug_snapshot!( + t("#\n # a\n #\n\n"), + @r###" + COMMENT@0..12 + HASH@0..1 "#" + TEXT@1..2 "\n" + WHITESPACE@2..4 " " + HASH@4..5 "#" + WHITESPACE@5..6 " " + TEXT@6..7 "a" + TEXT@7..8 "\n" + WHITESPACE@8..9 " " + HASH@9..10 "#" + TEXT@10..11 "\n" + BLANK_LINE@11..12 "\n" + "### + ); + + insta::assert_debug_snapshot!( + t("#\na\n #\n\n"), + @r###" + COMMENT@0..2 + HASH@0..1 "#" + TEXT@1..2 "\n" + "### + ); +} diff --git a/src/syntax/cookie.rs b/src/syntax/cookie.rs new file mode 100644 index 0000000..f54cb0e --- /dev/null +++ b/src/syntax/cookie.rs @@ -0,0 +1,147 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::digit0, + combinator::map, + sequence::{pair, separated_pair, tuple}, + IResult, +}; + +use super::{ + combinator::{l_bracket_token, node, r_bracket_token, token, GreenElement}, + input::Input, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn cookie_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_bracket_token, + alt(( + separated_pair(digit0, tag("/"), digit0), + pair(digit0, tag("%")), + )), + r_bracket_token, + )), + |(l_bracket, value, r_bracket)| { + let mut children = vec![l_bracket]; + + children.push(token(TEXT, value.0.as_str())); + match value.1.as_str() { + "%" => { + children.push(token(PERCENT, value.1.as_str())); + } + _ => { + children.push(token(SLASH, "/")); + children.push(token(TEXT, value.1.as_str())); + } + } + children.push(r_bracket); + + node(COOKIE, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::ast::Cookie; + use crate::tests::to_ast; + use crate::ParseConfig; + + let to_cookie = to_ast::(cookie_node); + + insta::assert_debug_snapshot!( + to_cookie("[1/10]").syntax, + @r###" + COOKIE@0..6 + L_BRACKET@0..1 "[" + TEXT@1..2 "1" + SLASH@2..3 "/" + TEXT@3..5 "10" + R_BRACKET@5..6 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[1/1000]").syntax, + @r###" + COOKIE@0..8 + L_BRACKET@0..1 "[" + TEXT@1..2 "1" + SLASH@2..3 "/" + TEXT@3..7 "1000" + R_BRACKET@7..8 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[10%]").syntax, + @r###" + COOKIE@0..5 + L_BRACKET@0..1 "[" + TEXT@1..3 "10" + PERCENT@3..4 "%" + R_BRACKET@4..5 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[%]").syntax, + @r###" + COOKIE@0..3 + L_BRACKET@0..1 "[" + TEXT@1..1 "" + PERCENT@1..2 "%" + R_BRACKET@2..3 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[/]").syntax, + @r###" + COOKIE@0..3 + L_BRACKET@0..1 "[" + TEXT@1..1 "" + SLASH@1..2 "/" + TEXT@2..2 "" + R_BRACKET@2..3 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[100/]").syntax, + @r###" + COOKIE@0..6 + L_BRACKET@0..1 "[" + TEXT@1..4 "100" + SLASH@4..5 "/" + TEXT@5..5 "" + R_BRACKET@5..6 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_cookie("[/100]").syntax, + @r###" + COOKIE@0..6 + L_BRACKET@0..1 "[" + TEXT@1..1 "" + SLASH@1..2 "/" + TEXT@2..5 "100" + R_BRACKET@5..6 "]" + "### + ); + + let config = &ParseConfig::default(); + + assert!(cookie_node(("[10% ]", config).into()).is_err()); + assert!(cookie_node(("[1//100]", config).into()).is_err()); + assert!(cookie_node(("[1\\100]", config).into()).is_err()); + assert!(cookie_node(("[10%%]", config).into()).is_err()); +} diff --git a/src/syntax/document.rs b/src/syntax/document.rs new file mode 100644 index 0000000..b32fc08 --- /dev/null +++ b/src/syntax/document.rs @@ -0,0 +1,139 @@ +use nom::{combinator::opt, IResult}; + +use super::{ + combinator::{blank_lines, node, GreenElement}, + drawer::property_drawer_node, + headline::{headline_node, section_node}, + input::Input, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn document_node(input: Input) -> IResult { + crate::lossless_parser!(document_node_base, input) +} + +fn document_node_base(input: Input) -> IResult { + if input.is_empty() { + return Ok((input, node(DOCUMENT, []))); + } + + let mut children = vec![]; + + let (input, property_drawer) = opt(property_drawer_node)(input)?; + if let Some(property_drawer) = property_drawer { + children.push(property_drawer); + } + + let (input, pre_blank) = blank_lines(input)?; + + children.extend(pre_blank); + + if input.is_empty() { + return Ok((input, node(DOCUMENT, children))); + } + + let (input, section) = opt(section_node)(input)?; + if let Some(section) = section { + children.push(section); + } + + let mut i = input; + while !i.is_empty() { + let (input, headline) = headline_node(i)?; + debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len(),); + i = input; + children.push(headline); + } + + Ok((i, node(DOCUMENT, children))) +} + +#[test] +fn parse() { + use crate::ast::Document; + use crate::tests::to_ast; + + let to_document = to_ast::(document_node); + + insta::assert_debug_snapshot!( + to_document("").syntax, + @r###" + DOCUMENT@0..0 + "### + ); + + insta::assert_debug_snapshot!( + to_document("\n \n\n").syntax, + @r###" + DOCUMENT@0..5 + BLANK_LINE@0..1 "\n" + BLANK_LINE@1..4 " \n" + BLANK_LINE@4..5 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_document("section").syntax, + @r###" + DOCUMENT@0..7 + SECTION@0..7 + PARAGRAPH@0..7 + TEXT@0..7 "section" + "### + ); + + insta::assert_debug_snapshot!( + to_document("\n* section").syntax, + @r###" + DOCUMENT@0..10 + BLANK_LINE@0..1 "\n" + HEADLINE@1..10 + HEADLINE_STARS@1..2 "*" + WHITESPACE@2..3 " " + HEADLINE_TITLE@3..10 + TEXT@3..10 "section" + "### + ); + + insta::assert_debug_snapshot!( + to_document("\n** heading 2\n* heading 1").syntax, + @r###" + DOCUMENT@0..25 + BLANK_LINE@0..1 "\n" + HEADLINE@1..14 + HEADLINE_STARS@1..3 "**" + WHITESPACE@3..4 " " + HEADLINE_TITLE@4..13 + TEXT@4..13 "heading 2" + NEW_LINE@13..14 "\n" + HEADLINE@14..25 + HEADLINE_STARS@14..15 "*" + WHITESPACE@15..16 " " + HEADLINE_TITLE@16..25 + TEXT@16..25 "heading 1" + "### + ); + + insta::assert_debug_snapshot!( + to_document("section\n** heading 2\n*heading 1").syntax, + @r###" + DOCUMENT@0..31 + SECTION@0..8 + PARAGRAPH@0..8 + TEXT@0..8 "section\n" + HEADLINE@8..31 + HEADLINE_STARS@8..10 "**" + WHITESPACE@10..11 " " + HEADLINE_TITLE@11..20 + TEXT@11..20 "heading 2" + NEW_LINE@20..21 "\n" + SECTION@21..31 + PARAGRAPH@21..31 + TEXT@21..31 "*heading 1" + "### + ); +} diff --git a/src/syntax/drawer.rs b/src/syntax/drawer.rs new file mode 100644 index 0000000..a44cbb0 --- /dev/null +++ b/src/syntax/drawer.rs @@ -0,0 +1,275 @@ +use nom::{ + bytes::complete::{tag_no_case, take_while1}, + character::complete::{space0, space1}, + combinator::{iterator, map, verify}, + sequence::tuple, + IResult, InputTake, +}; + +use super::{ + combinator::{ + blank_lines, colon_token, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, + NodeBuilder, + }, + element::element_nodes, + input::Input, + SyntaxKind::*, +}; + +fn drawer_begin_node(input: Input) -> IResult { + let mut b = NodeBuilder::new(); + + let (input, (ws, colon, name, colon_, ws_, nl)) = tuple(( + space0, + colon_token, + take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), + colon_token, + space0, + eol_or_eof, + ))(input)?; + + b.ws(ws); + b.push(colon); + b.text(name); + b.push(colon_); + b.ws(ws_); + b.nl(nl); + + Ok((input, (b.finish(DRAWER_BEGIN), name.as_str()))) +} + +fn drawer_end_node(input: Input) -> IResult { + let (input, (ws, colon, end, colon_, ws_, nl)) = tuple(( + space0, + colon_token, + tag_no_case("END"), + colon_token, + space0, + eol_or_eof, + ))(input)?; + + let mut b = NodeBuilder::new(); + b.ws(ws); + b.push(colon); + b.text(end); + b.push(colon_); + b.ws(ws_); + b.nl(nl); + + Ok((input, b.finish(DRAWER_END))) +} + +fn drawer_node_base(input: Input) -> IResult { + let (input, (begin, _)) = drawer_begin_node(input)?; + + let (input, pre_blank) = blank_lines(input)?; + + for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { + if let Ok((input, end)) = drawer_end_node(input) { + let (input, post_blank) = blank_lines(input)?; + let mut children = vec![begin]; + children.extend(pre_blank); + if !contents.is_empty() { + children.push(node(DRAWER_CONTENT, element_nodes(contents)?)); + } else { + children.push(node(DRAWER_CONTENT, [])); + } + children.push(end); + children.extend(post_blank); + + return Ok((input, node(DRAWER, children))); + } + } + + Err(nom::Err::Error(())) +} + +fn property_drawer_node_base(input: Input) -> IResult { + let (input, (begin, name)) = drawer_begin_node(input)?; + + if !name.eq_ignore_ascii_case("properties") { + return Err(nom::Err::Error(())); + } + + let mut children = vec![begin]; + + let mut it = iterator(input, node_property_node); + children.extend(&mut it); + let (input, _) = it.finish()?; + let (input, end) = drawer_end_node(input)?; + let (input, post_blank) = blank_lines(input)?; + + children.push(end); + children.extend(post_blank); + + Ok((input, node(PROPERTY_DRAWER, children))) +} + +fn node_property_node(input: Input) -> IResult { + let (input, ws1) = space0(input)?; + let (input, colon1) = colon_token(input)?; + let (input, (colon2, name)) = map( + verify( + take_while1(|c| c != ' ' && c != '\t' && c != '\n' && c != '\r'), + |i: &Input| i.ends_with(':'), + ), + |input: Input| input.take_split(input.len() - 1), + )(input)?; + let (input, ws2) = space1(input)?; + let (input, (value, ws3, nl)) = trim_line_end(input)?; + + let mut b = NodeBuilder::new(); + + b.ws(ws1); + b.push(colon1); + + if name.ends_with('+') { + let (plus, name) = name.take_split(name.len() - 1); + b.text(name); + b.token(PLUS, plus); + } else { + b.text(name); + } + + b.token(COLON, colon2); + b.ws(ws2); + b.text(value); + b.ws(ws3); + b.nl(nl); + + Ok((input, b.finish(NODE_PROPERTY))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn property_drawer_node(input: Input) -> IResult { + debug_assert!(!input.is_empty()); + crate::lossless_parser!(property_drawer_node_base, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn drawer_node(input: Input) -> IResult { + crate::lossless_parser!(drawer_node_base, input) +} + +#[test] +fn parse() { + use crate::{ + ast::{Drawer, PropertyDrawer}, + tests::to_ast, + ParseConfig, + }; + + let to_drawer = to_ast::(drawer_node); + let to_property_drawer = to_ast::(property_drawer_node); + + insta::assert_debug_snapshot!( + to_drawer( + r#":DRAWER: + :CUSTOM_ID: id + :END:"# + ).syntax, + @r###" + DRAWER@0..33 + DRAWER_BEGIN@0..9 + COLON@0..1 ":" + TEXT@1..7 "DRAWER" + COLON@7..8 ":" + NEW_LINE@8..9 "\n" + DRAWER_CONTENT@9..26 + PARAGRAPH@9..26 + TEXT@9..18 " :CUSTOM" + SUBSCRIPT@18..21 + UNDERSCORE@18..19 "_" + TEXT@19..21 "ID" + TEXT@21..26 ": id\n" + DRAWER_END@26..33 + WHITESPACE@26..28 " " + COLON@28..29 ":" + TEXT@29..32 "END" + COLON@32..33 ":" + "### + ); + + insta::assert_debug_snapshot!( + to_drawer( + r#":DRAWER: + + :END: + +"# + ).syntax, + @r###" + DRAWER@0..19 + DRAWER_BEGIN@0..9 + COLON@0..1 ":" + TEXT@1..7 "DRAWER" + COLON@7..8 ":" + NEW_LINE@8..9 "\n" + BLANK_LINE@9..10 "\n" + DRAWER_CONTENT@10..10 + DRAWER_END@10..18 + WHITESPACE@10..12 " " + COLON@12..13 ":" + TEXT@13..16 "END" + COLON@16..17 ":" + NEW_LINE@17..18 "\n" + BLANK_LINE@18..19 "\n" + "### + ); + + // https://github.com/PoiScript/orgize/issues/70#issuecomment-2099671563 + insta::assert_debug_snapshot!( + to_property_drawer(r#":PROPERTIES: +:header-args:clojure: :session *clojure-1* +:NAME: VALUE +:NAME+: VALUE +:END:"#).syntax, + @r###" + PROPERTY_DRAWER@0..91 + DRAWER_BEGIN@0..13 + COLON@0..1 ":" + TEXT@1..11 "PROPERTIES" + COLON@11..12 ":" + NEW_LINE@12..13 "\n" + NODE_PROPERTY@13..59 + COLON@13..14 ":" + TEXT@14..33 "header-args:clojure" + COLON@33..34 ":" + WHITESPACE@34..38 " " + TEXT@38..58 ":session *clojure-1*" + NEW_LINE@58..59 "\n" + NODE_PROPERTY@59..72 + COLON@59..60 ":" + TEXT@60..64 "NAME" + COLON@64..65 ":" + WHITESPACE@65..66 " " + TEXT@66..71 "VALUE" + NEW_LINE@71..72 "\n" + NODE_PROPERTY@72..86 + COLON@72..73 ":" + TEXT@73..77 "NAME" + PLUS@77..78 "+" + COLON@78..79 ":" + WHITESPACE@79..80 " " + TEXT@80..85 "VALUE" + NEW_LINE@85..86 "\n" + DRAWER_END@86..91 + COLON@86..87 ":" + TEXT@87..90 "END" + COLON@90..91 ":" + "### + ); + + let config = &ParseConfig::default(); + + // https://github.com/PoiScript/orgize/issues/9 + assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err()); + + assert!(property_drawer_node((":PROPERTIES:\n:NAME:VALUE\n:END:", config).into()).is_err()); +} diff --git a/src/syntax/dyn_block.rs b/src/syntax/dyn_block.rs new file mode 100644 index 0000000..b2b4a09 --- /dev/null +++ b/src/syntax/dyn_block.rs @@ -0,0 +1,107 @@ +use nom::{ + bytes::complete::tag_no_case, + character::complete::{alpha1, space0, space1}, + sequence::tuple, + IResult, InputTake, +}; + +use super::{ + combinator::{ + blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder, + }, + input::Input, + SyntaxKind::*, +}; + +fn dyn_block_node_base(input: Input) -> IResult { + let (input, begin) = dyn_block_begin_node(input)?; + let (input, pre_blank) = blank_lines(input)?; + + for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { + if let Ok((input, end)) = dyn_block_end_node(input) { + let (input, post_blank) = blank_lines(input)?; + let mut children = vec![begin]; + children.extend(pre_blank); + children.push(contents.text_token()); + children.push(end); + children.extend(post_blank); + + return Ok((input, node(DYN_BLOCK, children))); + } + } + + Err(nom::Err::Error(())) +} + +fn dyn_block_begin_node(input: Input) -> IResult { + let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple(( + space0, + tag_no_case("#+BEGIN:"), + space1, + alpha1, + trim_line_end, + ))(input)?; + + let mut b = NodeBuilder::new(); + b.ws(ws); + b.text(begin); + b.ws(ws_); + b.text(name); + b.text(args); + b.ws(ws__); + b.nl(nl); + + Ok((input, b.finish(DYN_BLOCK_BEGIN))) +} + +fn dyn_block_end_node(input: Input) -> IResult { + let (input, (ws, end, ws_, nl)) = + tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?; + + let mut b = NodeBuilder::new(); + b.ws(ws); + b.text(end); + b.ws(ws_); + b.nl(nl); + + Ok((input, b.finish(DYN_BLOCK_END))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn dyn_block_node(input: Input) -> IResult { + crate::lossless_parser!(dyn_block_node_base, input) +} + +#[test] +fn parse() { + use crate::{ast::DynBlock, tests::to_ast}; + + let to_dyn_block = to_ast::(dyn_block_node); + + insta::assert_debug_snapshot!( + to_dyn_block( + r#"#+BEGIN: clocktable :scope file + +CONTENTS +#+END: + "#).syntax, + @r###" + DYN_BLOCK@0..53 + DYN_BLOCK_BEGIN@0..32 + TEXT@0..8 "#+BEGIN:" + WHITESPACE@8..9 " " + TEXT@9..19 "clocktable" + TEXT@19..31 " :scope file" + NEW_LINE@31..32 "\n" + BLANK_LINE@32..33 "\n" + TEXT@33..42 "CONTENTS\n" + DYN_BLOCK_END@42..49 + TEXT@42..48 "#+END:" + NEW_LINE@48..49 "\n" + BLANK_LINE@49..53 " " + "### + ); +} diff --git a/src/syntax/element.rs b/src/syntax/element.rs new file mode 100644 index 0000000..aa4b88a --- /dev/null +++ b/src/syntax/element.rs @@ -0,0 +1,339 @@ +use std::iter::once; + +use memchr::memchr2_iter; +use nom::{IResult, InputTake}; + +use super::{ + block::block_node, + clock::clock_node, + combinator::GreenElement, + comment::comment_node, + drawer::drawer_node, + dyn_block::dyn_block_node, + fixed_width::fixed_width_node, + fn_def::fn_def_node, + input::Input, + keyword::{affiliated_keyword_nodes, keyword_node}, + latex_environment::latex_environment_node, + list::list_node, + paragraph::{paragraph_node, paragraph_nodes}, + rule::rule_node, + table::{org_table_node, table_el_node}, +}; + +/// Recognizes multiple org-mode elements +/// +/// input must not contains blank line in the beginning +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn element_nodes(input: Input) -> Result, nom::Err<()>> { + debug_assert!(!input.is_empty()); + // TODO: + // debug_assert!( + // blank_lines(input).unwrap().1.is_empty(), + // "input must not starts with blank lines: {:?}", + // input.s + // ); + + let mut i = input; + let mut nodes = vec![]; + + 'l: while !i.is_empty() { + for (input, head) in ElementPositions::new(i) { + if let Ok((input, element)) = element_node(input) { + if !head.is_empty() { + nodes.extend(paragraph_nodes(head)?); + } + nodes.push(element); + debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); + i = input; + continue 'l; + } + } + nodes.extend(paragraph_nodes(i)?); + break; + } + + debug_assert_eq!( + input.as_str(), + nodes.iter().fold(String::new(), |s, n| s + &n.to_string()), + "parser must be lossless" + ); + + Ok(nodes) +} + +/// Recognizes an org-mode element expect paragraph +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn element_node(input: Input) -> IResult { + // skip affiliated keyword first + let (i, nodes) = affiliated_keyword_nodes(input)?; + + let has_affiliated_keyword = !nodes.is_empty(); + + // find first non-whitespace character + let byte = i.bytes().find(|&b| b != b' ' && b != b'\t'); + + debug_assert!( + !(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))), + "affiliated_keyword must not followed by blank lines: {:?}", + input.s + ); + + let result = match byte { + Some(b'[') => fn_def_node(input), + Some(b'0'..=b'9') | Some(b'*') => list_node(input), + // clock doesn't have affiliated keywords + Some(b'C') if !has_affiliated_keyword => clock_node(input), + Some(b'-') => rule_node(input).or_else(|_| list_node(input)), + Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)), + Some(b'|') => org_table_node(input), + Some(b'+') => table_el_node(input).or_else(|_| list_node(input)), + Some(b'#') => block_node(input) + .or_else(|_| keyword_node(input)) + .or_else(|_| dyn_block_node(input)) + .or_else(|_| comment_node(input)), + Some(b'\\') => latex_environment_node(input), + _ => Err(nom::Err::Error(())), + }; + + if has_affiliated_keyword { + result.or_else(|_| paragraph_node(input)) + } else { + result + } +} + +struct ElementPositions<'a> { + input: Input<'a>, + pos: usize, +} + +impl<'a> ElementPositions<'a> { + fn new(input: Input<'a>) -> Self { + ElementPositions { input, pos: 0 } + } +} + +impl<'a> Iterator for ElementPositions<'a> { + type Item = (Input<'a>, Input<'a>); + + fn next(&mut self) -> Option { + if self.pos >= self.input.s.len() { + return None; + } + + let bytes = &self.input.as_bytes()[self.pos..]; + + let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1)); + + while let Some(i) = iter.next() { + let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?; + + if matches!( + b, + b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\' + ) { + let previous = self.pos; + self.pos = iter + .next() + .map_or_else(|| self.input.s.len(), |i| i + self.pos); + + debug_assert!( + previous < self.pos && self.pos <= self.input.s.len(), + "{} < {} < {}", + previous, + self.pos, + self.input.s.len() + ); + + let (input, head) = self.input.take_split(i + previous); + + return Some((input, head)); + } + } + + None + } +} + +#[test] +fn positions() { + let config = crate::ParseConfig::default(); + let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n"; + let vec = ElementPositions::new((s, &config).into()).collect::>(); + assert_eq!(vec.len(), 5); + assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n"); + assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n"); + assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n"); + assert_eq!(vec[3].0.s, "\t\t[\n: \r\n"); + assert_eq!(vec[4].0.s, ": \r\n"); +} + +#[test] +fn parse() { + use crate::syntax::{SyntaxKind, SyntaxNode}; + use crate::{syntax::combinator::node, ParseConfig}; + + let t = |input: &str| { + let config = &ParseConfig::default(); + let children = element_nodes((input, config).into()).unwrap(); + SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap()) + }; + + // paragraph stops at blank lines + insta::assert_debug_snapshot!( + t(r#"a + +b"#), + @r###" + SECTION@0..4 + PARAGRAPH@0..3 + TEXT@0..2 "a\n" + BLANK_LINE@2..3 "\n" + PARAGRAPH@3..4 + TEXT@3..4 "b" + "### + ); + + // paragraph followed by special element + insta::assert_debug_snapshot!( + t("Table:\n|cell"), + @r###" + SECTION@0..12 + PARAGRAPH@0..7 + TEXT@0..7 "Table:\n" + ORG_TABLE@7..12 + ORG_TABLE_STANDARD_ROW@7..12 + PIPE@7..8 "|" + ORG_TABLE_CELL@8..12 + TEXT@8..12 "cell" + "### + ); +} + +#[test] +fn affiliated_keywords() { + use crate::syntax::{SyntaxKind, SyntaxNode}; + use crate::{syntax::combinator::node, ParseConfig}; + + let t = |input: &str| { + let config = &ParseConfig::default(); + let children = element_nodes((input, config).into()).unwrap(); + SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap()) + }; + + // affiliated keywords + paragraph + insta::assert_debug_snapshot!( + t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"), + @r###" + SECTION@0..41 + PARAGRAPH@0..41 + AFFILIATED_KEYWORD@0..26 + HASH_PLUS@0..2 "#+" + TEXT@2..11 "ATTR_HTML" + COLON@11..12 ":" + TEXT@12..25 " :width 300px" + NEW_LINE@25..26 "\n" + LINK@26..41 + L_BRACKET2@26..28 "[[" + LINK_PATH@28..39 "./img/a.jpg" + R_BRACKET2@39..41 "]]" + "### + ); + + // affiliated keywords + blank lines, fallback to normal keyword + insta::assert_debug_snapshot!( + t("#+ATTR_HTML: :width 300px\n#+CAPTION: abc\n\n[[./img/a.jpg]]"), + @r###" + SECTION@0..57 + KEYWORD@0..26 + HASH_PLUS@0..2 "#+" + TEXT@2..11 "ATTR_HTML" + COLON@11..12 ":" + TEXT@12..25 " :width 300px" + NEW_LINE@25..26 "\n" + KEYWORD@26..42 + HASH_PLUS@26..28 "#+" + TEXT@28..35 "CAPTION" + COLON@35..36 ":" + TEXT@36..40 " abc" + NEW_LINE@40..41 "\n" + BLANK_LINE@41..42 "\n" + PARAGRAPH@42..57 + LINK@42..57 + L_BRACKET2@42..44 "[[" + LINK_PATH@44..55 "./img/a.jpg" + R_BRACKET2@55..57 "]]" + "### + ); + + // affiliated keywords + special element + insta::assert_debug_snapshot!( + t("#+CAPTION: a footnote def\n[fn:WORD] https://orgmode.org"), + @r###" + SECTION@0..55 + FN_DEF@0..55 + AFFILIATED_KEYWORD@0..26 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "CAPTION" + COLON@9..10 ":" + TEXT@10..25 " a footnote def" + NEW_LINE@25..26 "\n" + L_BRACKET@26..27 "[" + TEXT@27..29 "fn" + COLON@29..30 ":" + TEXT@30..34 "WORD" + R_BRACKET@34..35 "]" + TEXT@35..55 " https://orgmode.org" + "### + ); + + // affiliated keywords + clock + insta::assert_debug_snapshot!( + t("#+CAPTION: a footnote def\nCLOCK: [2003-09-16 Tue 09:39]"), + @r###" + SECTION@0..55 + PARAGRAPH@0..55 + AFFILIATED_KEYWORD@0..26 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "CAPTION" + COLON@9..10 ":" + TEXT@10..25 " a footnote def" + NEW_LINE@25..26 "\n" + TEXT@26..33 "CLOCK: " + TIMESTAMP_INACTIVE@33..55 + L_BRACKET@33..34 "[" + TIMESTAMP_YEAR@34..38 "2003" + MINUS@38..39 "-" + TIMESTAMP_MONTH@39..41 "09" + MINUS@41..42 "-" + TIMESTAMP_DAY@42..44 "16" + WHITESPACE@44..45 " " + TIMESTAMP_DAYNAME@45..48 "Tue" + WHITESPACE@48..49 " " + TIMESTAMP_HOUR@49..51 "09" + COLON@51..52 ":" + TIMESTAMP_MINUTE@52..54 "39" + R_BRACKET@54..55 "]" + "### + ); + + // affiliated keywords + eof + insta::assert_debug_snapshot!( + t("#+CAPTION: Longer caption."), + @r###" + SECTION@0..26 + KEYWORD@0..26 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "CAPTION" + COLON@9..10 ":" + TEXT@10..26 " Longer caption." + "### + ); +} diff --git a/src/syntax/emphasis.rs b/src/syntax/emphasis.rs new file mode 100644 index 0000000..d3fb710 --- /dev/null +++ b/src/syntax/emphasis.rs @@ -0,0 +1,186 @@ +use bytecount::count; +use memchr::memchr_iter; +use nom::{combinator::map, IResult, Slice}; + +use super::{ + combinator::{node, token, GreenElement}, + input::Input, + object::standard_object_nodes, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn bold_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'*'), |contents| { + let mut children = vec![token(STAR, "*")]; + children.extend(standard_object_nodes(contents)); + children.push(token(STAR, "*")); + node(BOLD, children) + }); + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn code_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'~'), |contents| { + node( + CODE, + [token(TILDE, "~"), contents.text_token(), token(TILDE, "~")], + ) + }); + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn strike_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'+'), |contents| { + let mut children = vec![token(PLUS, "+")]; + children.extend(standard_object_nodes(contents)); + children.push(token(PLUS, "+")); + node(STRIKE, children) + }); + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn verbatim_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'='), |contents| { + node( + VERBATIM, + [token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")], + ) + }); + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn underline_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'_'), |contents| { + let mut children = vec![token(UNDERSCORE, "_")]; + children.extend(standard_object_nodes(contents)); + children.push(token(UNDERSCORE, "_")); + node(UNDERLINE, children) + }); + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn italic_node(input: Input) -> IResult { + let mut parser = map(emphasis(b'/'), |contents| { + let mut children = vec![token(SLASH, "/")]; + children.extend(standard_object_nodes(contents)); + children.push(token(SLASH, "/")); + node(ITALIC, children) + }); + crate::lossless_parser!(parser, input) +} + +fn emphasis(marker: u8) -> impl Fn(Input) -> IResult { + move |input: Input| { + let bytes = input.as_bytes(); + + if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() { + return Err(nom::Err::Error(())); + } + + for idx in memchr_iter(marker, bytes).skip(1) { + // contains at least one character + if idx == 1 { + continue; + } else if count(&bytes[1..idx], b'\n') >= 2 { + break; + } else if validate_marker(idx, input) { + return Ok((input.slice(idx + 1..), input.slice(1..idx))); + } + } + + Err(nom::Err::Error(())) + } +} + +fn validate_marker(pos: usize, text: Input) -> bool { + if text.as_bytes()[pos - 1].is_ascii_whitespace() { + false + } else if let Some(post) = text.as_bytes().get(pos + 1) { + [ + b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}', + b'[', + ] + .contains(post) + } else { + true + } +} + +pub fn verify_pre(input: &str) -> bool { + if input.is_empty() { + return true; + } + matches!( + input.as_bytes()[input.len() - 1], + b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n' + ) +} + +#[test] +fn parse() { + use crate::{ast::Bold, tests::to_ast, ParseConfig}; + + let to_bold = to_ast::(bold_node); + + insta::assert_debug_snapshot!( + to_bold("*bold*").syntax, + @r###" + BOLD@0..6 + STAR@0..1 "*" + TEXT@1..5 "bold" + STAR@5..6 "*" + "### + ); + + insta::assert_debug_snapshot!( + to_bold("*bo*ld*").syntax, + @r###" + BOLD@0..7 + STAR@0..1 "*" + TEXT@1..6 "bo*ld" + STAR@6..7 "*" + "### + ); + + insta::assert_debug_snapshot!( + to_bold("*bo\nld*").syntax, + @r###" + BOLD@0..7 + STAR@0..1 "*" + TEXT@1..6 "bo\nld" + STAR@6..7 "*" + "### + ); + + let config = &ParseConfig::default(); + + assert!(bold_node(("*bold*a", config).into()).is_err()); + assert!(bold_node(("*bold *", config).into()).is_err()); + assert!(bold_node(("* bold*", config).into()).is_err()); + assert!(bold_node(("*b\nol\nd*", config).into()).is_err()); + assert!(italic_node(("*bold*", config).into()).is_err()); +} diff --git a/src/syntax/entity.rs b/src/syntax/entity.rs new file mode 100644 index 0000000..056126e --- /dev/null +++ b/src/syntax/entity.rs @@ -0,0 +1,120 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_while_m_n}, + character::complete::alphanumeric1, + combinator::opt, + IResult, +}; + +use crate::{ + entities::ENTITIES, + syntax::combinator::{backslash_token, node}, + SyntaxKind, +}; + +use super::{combinator::GreenElement, input::Input}; + +pub fn entity_node(input: Input) -> IResult { + debug_assert!(input.s.starts_with('\\')); + let mut parser = alt((template1, template2)); + crate::lossless_parser!(parser, input) +} + +// \NAME POST or // \NAME{} +fn template1(input: Input) -> IResult { + let (input, backslash) = backslash_token(input)?; + let (input, name) = alphanumeric1(input)?; + + if ENTITIES.iter().all(|i| i.0 != name.s) { + return Err(nom::Err::Error(())); + } + let (input, brackets) = opt(tag("{}"))(input)?; + + if let Some(brackets) = brackets { + return Ok(( + input, + node( + SyntaxKind::ENTITY, + [backslash, name.text_token(), brackets.text_token()], + ), + )); + } + + if let Some(post) = input.bytes().next() { + if post.is_ascii_alphabetic() { + return Err(nom::Err::Error(())); + } + } + + Ok(( + input, + node(SyntaxKind::ENTITY, [backslash, name.text_token()]), + )) +} + +// \_SPACES +fn template2(input: Input) -> IResult { + let (input, backslash) = backslash_token(input)?; + let (input, underscore) = tag("_")(input)?; + let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?; + Ok(( + input, + node( + SyntaxKind::ENTITY, + [ + backslash, + underscore.token(SyntaxKind::UNDERSCORE), + spaces.text_token(), + ], + ), + )) +} + +#[test] +fn parse() { + use crate::{ast::Entity, tests::to_ast, ParseConfig}; + + let to_entity = to_ast::(entity_node); + + insta::assert_debug_snapshot!( + to_entity("\\cent").syntax, + @r###" + ENTITY@0..5 + BACKSLASH@0..1 "\\" + TEXT@1..5 "cent" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\S").syntax, + @r###" + ENTITY@0..2 + BACKSLASH@0..1 "\\" + TEXT@1..2 "S" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\frac12{}test").syntax, + @r###" + ENTITY@0..9 + BACKSLASH@0..1 "\\" + TEXT@1..7 "frac12" + TEXT@7..9 "{}" + "### + ); + + insta::assert_debug_snapshot!( + to_entity("\\_ ").syntax, + @r###" + ENTITY@0..21 + BACKSLASH@0..1 "\\" + UNDERSCORE@1..2 "_" + TEXT@2..21 " " + "### + ); + + let c = ParseConfig::default(); + + assert!(entity_node(("\\poi", &c).into()).is_err()); +} diff --git a/src/syntax/fixed_width.rs b/src/syntax/fixed_width.rs new file mode 100644 index 0000000..9a89e93 --- /dev/null +++ b/src/syntax/fixed_width.rs @@ -0,0 +1,100 @@ +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::{space0, space1}, + combinator::{iterator, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, + input::Input, + keyword::affiliated_keyword_nodes, + SyntaxKind, +}; + +fn fixed_width_node_base(input: Input) -> IResult { + let mut b = NodeBuilder::new(); + + let (input, keywords) = affiliated_keyword_nodes(input)?; + b.children.extend(keywords); + + let mut iter = iterator( + input, + opt(tuple(( + space0, + tag(":"), + opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))), + eol_or_eof, + ))), + ); + + for (idx, option) in iter.enumerate() { + match option { + Some((ws, common, content, eol)) => { + b.ws(ws); + b.token(SyntaxKind::COMMA, common); + if let Some((ws, text)) = content { + b.ws(ws); + b.text(text); + } + b.text(eol); + } + _ if idx == 0 => return Err(nom::Err::Error(())), + _ => break, + } + } + + let (input, _) = iter.finish()?; + + let (input, post_blank) = blank_lines(input)?; + + b.children.extend(post_blank); + + Ok((input, b.finish(SyntaxKind::FIXED_WIDTH))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn fixed_width_node(input: Input) -> IResult { + crate::lossless_parser!(fixed_width_node_base, input) +} + +#[test] +fn parse() { + use crate::{ast::FixedWidth, tests::to_ast}; + + let to_fixed_width = to_ast::(fixed_width_node); + + insta::assert_debug_snapshot!( + to_fixed_width( + r#": A +: +: B +: C + + "# + ).syntax, + @r###" + FIXED_WIDTH@0..19 + COMMA@0..1 ":" + WHITESPACE@1..2 " " + TEXT@2..3 "A" + TEXT@3..4 "\n" + COMMA@4..5 ":" + TEXT@5..6 "\n" + COMMA@6..7 ":" + WHITESPACE@7..8 " " + TEXT@8..9 "B" + TEXT@9..10 "\n" + COMMA@10..11 ":" + WHITESPACE@11..12 " " + TEXT@12..13 "C" + TEXT@13..14 "\n" + BLANK_LINE@14..15 "\n" + BLANK_LINE@15..19 " " + "### + ); +} diff --git a/src/syntax/fn_def.rs b/src/syntax/fn_def.rs new file mode 100644 index 0000000..10346dc --- /dev/null +++ b/src/syntax/fn_def.rs @@ -0,0 +1,157 @@ +use nom::{ + bytes::complete::{tag, take_while1}, + combinator::map, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement, + NodeBuilder, + }, + input::Input, + keyword::affiliated_keyword_nodes, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn fn_def_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + affiliated_keyword_nodes, + l_bracket_token, + tag("fn"), + colon_token, + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + r_bracket_token, + trim_line_end, + blank_lines, + )), + |( + affiliated_keywords, + l_bracket, + fn_, + colon, + label, + r_bracket, + (content, ws_, nl), + post_blank, + )| { + let mut b = NodeBuilder::new(); + b.children.extend(affiliated_keywords); + b.push(l_bracket); + b.text(fn_); + b.push(colon); + b.text(label); + b.push(r_bracket); + b.text(content); + b.ws(ws_); + b.nl(nl); + b.children.extend(post_blank); + b.finish(SyntaxKind::FN_DEF) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::ParseConfig; + use crate::{ast::FnDef, tests::to_ast}; + + let to_fn_def = to_ast::(fn_def_node); + + insta::assert_debug_snapshot!( + to_fn_def("[fn:1] https://orgmode.org").syntax, + @r###" + FN_DEF@0..26 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..5 "1" + R_BRACKET@5..6 "]" + TEXT@6..26 " https://orgmode.org" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_def("[fn:word_1] https://orgmode.org").syntax, + @r###" + FN_DEF@0..31 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..10 "word_1" + R_BRACKET@10..11 "]" + TEXT@11..31 " https://orgmode.org" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_def("[fn:WORD-1] https://orgmode.org").syntax, + @r###" + FN_DEF@0..31 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..10 "WORD-1" + R_BRACKET@10..11 "]" + TEXT@11..31 " https://orgmode.org" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_def("[fn:WORD]").syntax, + @r###" + FN_DEF@0..9 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..8 "WORD" + R_BRACKET@8..9 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax, + @r###" + FN_DEF@0..66 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..5 "1" + R_BRACKET@5..6 "]" + TEXT@6..65 " In particular, the p ..." + NEW_LINE@65..66 "\n" + "### + ); + + let config = &ParseConfig::default(); + + assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err()); + assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err()); + assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err()); + + insta::assert_debug_snapshot!( + to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax, + @r###" + FN_DEF@0..45 + AFFILIATED_KEYWORD@0..14 + HASH_PLUS@0..2 "#+" + TEXT@2..10 "ATTR_poi" + COLON@10..11 ":" + TEXT@11..13 " 1" + NEW_LINE@13..14 "\n" + L_BRACKET@14..15 "[" + TEXT@15..17 "fn" + COLON@17..18 ":" + TEXT@18..24 "WORD-1" + R_BRACKET@24..25 "]" + TEXT@25..45 " https://orgmode.org" + "### + ); +} diff --git a/src/syntax/fn_ref.rs b/src/syntax/fn_ref.rs new file mode 100644 index 0000000..c190825 --- /dev/null +++ b/src/syntax/fn_ref.rs @@ -0,0 +1,122 @@ +use memchr::memchr2_iter; +use nom::{ + bytes::complete::{tag, take_while}, + combinator::opt, + sequence::tuple, + Err, IResult, InputTake, +}; + +use super::{ + combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement}, + input::Input, + object::standard_object_nodes, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn fn_ref_node(input: Input) -> IResult { + crate::lossless_parser!(fn_ref_node_base, input) +} + +fn fn_ref_node_base(input: Input) -> IResult { + let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple(( + l_bracket_token, + tag("fn"), + colon_token, + take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + opt(tuple((colon_token, balanced_brackets))), + r_bracket_token, + ))(input)?; + + let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()]; + if let Some((colon, definition)) = definition { + children.push(colon); + children.extend(standard_object_nodes(definition)); + } + children.push(r_bracket); + + Ok((input, node(FN_REF, children))) +} + +fn balanced_brackets(input: Input) -> IResult { + let mut pairs = 1; + let bytes = input.as_bytes(); + for i in memchr2_iter(b'[', b']', bytes) { + if bytes[i] == b'[' { + pairs += 1; + } else if pairs != 1 { + pairs -= 1; + } else { + return Ok(input.take_split(i)); + } + } + Err(Err::Error(())) +} + +#[test] +fn parse() { + use crate::{ast::FnRef, tests::to_ast, ParseConfig}; + + let to_fn_ref = to_ast::(fn_ref_node); + + insta::assert_debug_snapshot!( + to_fn_ref("[fn:1]").syntax, + @r###" + FN_REF@0..6 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..5 "1" + R_BRACKET@5..6 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_ref("[fn:1:2]").syntax, + @r###" + FN_REF@0..8 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..5 "1" + COLON@5..6 ":" + TEXT@6..7 "2" + R_BRACKET@7..8 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_ref("[fn::2]").syntax, + @r###" + FN_REF@0..7 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..4 "" + COLON@4..5 ":" + TEXT@5..6 "2" + R_BRACKET@6..7 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_fn_ref("[fn::[]]").syntax, + @r###" + FN_REF@0..8 + L_BRACKET@0..1 "[" + TEXT@1..3 "fn" + COLON@3..4 ":" + TEXT@4..4 "" + COLON@4..5 ":" + TEXT@5..7 "[]" + R_BRACKET@7..8 "]" + "### + ); + + let config = &ParseConfig::default(); + + assert!(fn_ref_node(("[fn::[]", config).into()).is_err()); +} diff --git a/src/syntax/headline.rs b/src/syntax/headline.rs new file mode 100644 index 0000000..d094d30 --- /dev/null +++ b/src/syntax/headline.rs @@ -0,0 +1,369 @@ +use memchr::memrchr_iter; +use nom::{ + bytes::complete::take_while1, + character::complete::{anychar, space0}, + combinator::{map, opt}, + sequence::tuple, + IResult, InputTake, Slice, +}; + +use super::{ + combinator::{ + hash_token, l_bracket_token, line_starts_iter, node, r_bracket_token, token, trim_line_end, + GreenElement, NodeBuilder, + }, + drawer::property_drawer_node, + element::element_nodes, + input::Input, + object::standard_object_nodes, + planning::planning_node, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn headline_node(input: Input) -> IResult { + debug_assert!(!input.is_empty()); + crate::lossless_parser!(headline_node_base, input) +} + +fn headline_node_base(input: Input) -> IResult { + let (input, stars) = headline_stars(input)?; + + let mut b = NodeBuilder::new(); + + b.token(HEADLINE_STARS, stars); + + let (input, ws) = space0(input)?; + b.ws(ws); + + let (input, headline_keyword) = opt(headline_keyword_token)(input)?; + + if let Some((headline_keyword, ws)) = headline_keyword { + b.push(headline_keyword); + b.ws(ws); + } + + let (input, headline_priority) = opt(headline_priority_node)(input)?; + + if let Some((headline_priority, ws)) = headline_priority { + b.push(headline_priority); + b.ws(ws); + } + + let (input, (title_and_tags, ws_, nl)) = trim_line_end(input)?; + let (title, tags) = opt(headline_tags_node)(title_and_tags)?; + + if !title.is_empty() { + b.push(node(HEADLINE_TITLE, standard_object_nodes(title))); + } + b.push_opt(tags); + b.ws(ws_); + b.nl(nl); + + if input.is_empty() { + return Ok((input, b.finish(HEADLINE))); + } + + let (input, planning) = opt(planning_node)(input)?; + b.push_opt(planning); + + if input.is_empty() { + return Ok((input, b.finish(HEADLINE))); + } + + let (input, property_drawer) = opt(property_drawer_node)(input)?; + b.push_opt(property_drawer); + + if input.is_empty() { + return Ok((input, b.finish(HEADLINE))); + } + + let (input, section) = opt(section_node)(input)?; + b.push_opt(section); + + let mut i = input; + let current_level = stars.len(); + while !i.is_empty() { + let next_level = i.bytes().take_while(|&c| c == b'*').count(); + + if next_level <= current_level { + break; + } + + let (input, headline) = headline_node(i)?; + b.push(headline); + debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); + i = input; + } + + Ok((i, b.finish(HEADLINE))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn section_node(input: Input) -> IResult { + debug_assert!(!input.is_empty()); + let (input, section) = section_text(input)?; + Ok((input, node(SECTION, element_nodes(section)?))) +} + +fn section_text(input: Input) -> IResult { + for (input, section) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) { + if headline_stars(input).is_ok() { + if section.is_empty() { + return Err(nom::Err::Error(())); + } + + return Ok((input, section)); + } + } + + Ok(input.take_split(input.len())) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn headline_stars(input: Input) -> IResult { + let bytes = input.as_bytes(); + let level = bytes.iter().take_while(|&&c| c == b'*').count(); + + if level == 0 { + Err(nom::Err::Error(())) + } + // headline stars must be followed by space + else if matches!(bytes.get(level), Some(b' ')) { + Ok(input.take_split(level)) + } else { + Err(nom::Err::Error(())) + } +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn headline_tags_node(input: Input) -> IResult { + if !input.s.ends_with(':') { + return Err(nom::Err::Error(())); + }; + + let bytes = input.as_bytes(); + + // we're going to skip to first colon, so we start from the + // second last character + let mut i = input.len() - 1; + let mut can_not_be_ws = true; + let mut children = vec![token(COLON, ":")]; + + for ii in memrchr_iter(b':', bytes).skip(1) { + let item = &bytes[ii + 1..i]; + + if item.is_empty() { + children.push(token(COLON, ":")); + can_not_be_ws = false; + debug_assert!(i > ii, "{} > {}", i, ii); + i = ii; + } else if String::from_utf8_lossy(item) + .chars() + // https://github.com/yyr/org-mode/blob/d8494b5668ad4d4e68e83228ae8451eaa01d2220/lisp/org-element.el#L922C25-L922C32 + .all(|c| c.is_alphanumeric() || c == '_' || c == '@' || c == '#' || c == '%') + { + children.push(input.slice(ii + 1..i).text_token()); + children.push(token(COLON, ":")); + can_not_be_ws = false; + debug_assert!(i > ii, "{} > {}", i, ii); + i = ii; + } else if item.iter().all(|&c| c == b' ' || c == b'\t') && !can_not_be_ws { + children.push(input.slice(ii + 1..i).ws_token()); + children.push(token(COLON, ":")); + can_not_be_ws = true; + debug_assert!(i > ii, "{} > {}", i, ii); + i = ii; + } else { + break; + } + } + + if children.len() <= 2 { + return Err(nom::Err::Error(())); + } + + if i != 0 && bytes[i - 1] != b' ' && bytes[i - 1] != b'\t' { + return Err(nom::Err::Error(())); + } + + // we parse headline tag from right to left, + // so we need to reverse the result after it finishes + children.reverse(); + + Ok((input.slice(0..i), node(HEADLINE_TAGS, children))) +} + +fn headline_keyword_token(input: Input) -> IResult { + let (input, word) = take_while1(|c: char| !c.is_ascii_whitespace())(input)?; + let (input, ws) = space0(input)?; + if input.c.todo_keywords.0.iter().any(|k| k == word.s) { + Ok((input, (word.token(HEADLINE_KEYWORD_TODO), ws))) + } else if input.c.todo_keywords.1.iter().any(|k| k == word.s) { + Ok((input, (word.token(HEADLINE_KEYWORD_DONE), ws))) + } else { + Err(nom::Err::Error(())) + } +} + +fn headline_priority_node(input: Input) -> IResult { + let (input, node) = map( + tuple((l_bracket_token, hash_token, anychar, r_bracket_token)), + |(l_bracket, hash, char, r_bracket)| { + node( + HEADLINE_PRIORITY, + [l_bracket, hash, token(TEXT, &char.to_string()), r_bracket], + ) + }, + )(input)?; + + let (input, ws) = space0(input)?; + + Ok((input, (node, ws))) +} + +#[test] +fn parse() { + use crate::{ast::Headline, tests::to_ast, ParseConfig}; + + let to_headline = to_ast::(headline_node); + + insta::assert_debug_snapshot!( + to_headline("* foo").syntax, + @r###" + HEADLINE@0..5 + HEADLINE_STARS@0..1 "*" + WHITESPACE@1..2 " " + HEADLINE_TITLE@2..5 + TEXT@2..5 "foo" + "### + ); + + insta::assert_debug_snapshot!( + to_headline("* foo\n\n** bar").syntax, + @r###" + HEADLINE@0..13 + HEADLINE_STARS@0..1 "*" + WHITESPACE@1..2 " " + HEADLINE_TITLE@2..5 + TEXT@2..5 "foo" + NEW_LINE@5..6 "\n" + SECTION@6..7 + PARAGRAPH@6..7 + BLANK_LINE@6..7 "\n" + HEADLINE@7..13 + HEADLINE_STARS@7..9 "**" + WHITESPACE@9..10 " " + HEADLINE_TITLE@10..13 + TEXT@10..13 "bar" + "### + ); + + insta::assert_debug_snapshot!( + to_headline("* TODO foo\nbar\n** baz\n").syntax, + @r###" + HEADLINE@0..22 + HEADLINE_STARS@0..1 "*" + WHITESPACE@1..2 " " + HEADLINE_KEYWORD_TODO@2..6 "TODO" + WHITESPACE@6..7 " " + HEADLINE_TITLE@7..10 + TEXT@7..10 "foo" + NEW_LINE@10..11 "\n" + SECTION@11..15 + PARAGRAPH@11..15 + TEXT@11..15 "bar\n" + HEADLINE@15..22 + HEADLINE_STARS@15..17 "**" + WHITESPACE@17..18 " " + HEADLINE_TITLE@18..21 + TEXT@18..21 "baz" + NEW_LINE@21..22 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_headline("** [#A] foo\n* baz").syntax, + @r###" + HEADLINE@0..12 + HEADLINE_STARS@0..2 "**" + WHITESPACE@2..3 " " + HEADLINE_PRIORITY@3..7 + L_BRACKET@3..4 "[" + HASH@4..5 "#" + TEXT@5..6 "A" + R_BRACKET@6..7 "]" + WHITESPACE@7..8 " " + HEADLINE_TITLE@8..11 + TEXT@8..11 "foo" + NEW_LINE@11..12 "\n" + "### + ); + + let config = &ParseConfig::default(); + + assert!(headline_node(("_ ", config).into()).is_err()); + assert!(headline_node(("*", config).into()).is_err()); + assert!(headline_node((" * ", config).into()).is_err()); + assert!(headline_node(("**", config).into()).is_err()); + assert!(headline_node(("**\n", config).into()).is_err()); + assert!(headline_node(("**\r", config).into()).is_err()); + assert!(headline_node(("**\t", config).into()).is_err()); +} + +#[test] +fn issue_15_16() { + use crate::{ast::Headline, tests::to_ast}; + + let to_headline = to_ast::(headline_node); + + assert!(to_headline("* a ::").tags().count() == 0); + assert!(to_headline("* a : :").tags().count() == 0); + assert!(to_headline("* a :(:").tags().count() == 0); + assert!(to_headline("* a :a: :").tags().count() == 0); + assert!(to_headline("* a :a :").tags().count() == 0); + assert!(to_headline("* a a:").tags().count() == 0); + assert!(to_headline("* a :a").tags().count() == 0); + + let tags = to_headline("* a \t:_:").tags(); + assert_eq!( + vec!["_".to_string()], + tags.map(|x| x.to_string()).collect::>(), + ); + + let tags = to_headline("* a \t :@:").tags(); + assert_eq!( + vec!["@".to_string()], + tags.map(|x| x.to_string()).collect::>(), + ); + + let tags = to_headline("* a :#:").tags(); + assert_eq!( + vec!["#".to_string()], + tags.map(|x| x.to_string()).collect::>(), + ); + + let tags = to_headline("* a\t :%:").tags(); + assert_eq!( + vec!["%".to_string()], + tags.map(|x| x.to_string()).collect::>(), + ); + + let tags = to_headline("* a :余: :破:").tags(); + assert_eq!( + vec!["余".to_string(), "破".to_string()], + tags.map(|x| x.to_string()).collect::>(), + ); +} diff --git a/src/syntax/inline_call.rs b/src/syntax/inline_call.rs new file mode 100644 index 0000000..0e8d058 --- /dev/null +++ b/src/syntax/inline_call.rs @@ -0,0 +1,130 @@ +use nom::{ + bytes::complete::{tag, take_till}, + combinator::{map, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + l_bracket_token, l_parens_token, node, r_bracket_token, r_parens_token, GreenElement, + }, + input::Input, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn inline_call_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + tag("call_"), + take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'), + opt(tuple(( + l_bracket_token, + take_till(|c| c == ']' || c == '\n'), + r_bracket_token, + ))), + l_parens_token, + take_till(|c| c == ')' || c == '\n'), + r_parens_token, + opt(tuple(( + l_bracket_token, + take_till(|c| c == ']' || c == '\n'), + r_bracket_token, + ))), + )), + |(call, name, inside_header, l_paren, arguments, r_paren, end_header)| { + let mut children = vec![call.text_token()]; + children.push(name.text_token()); + if let Some((l_bracket, header, r_bracket)) = inside_header { + children.push(l_bracket); + children.push(header.text_token()); + children.push(r_bracket); + } + children.push(l_paren); + children.push(arguments.text_token()); + children.push(r_paren); + if let Some((l_bracket, header, r_bracket)) = end_header { + children.push(l_bracket); + children.push(header.text_token()); + children.push(r_bracket); + } + node(SyntaxKind::INLINE_CALL, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::InlineCall, tests::to_ast}; + + let to_inline_call = to_ast::(inline_call_node); + + let call = to_inline_call("call_square(4)"); + insta::assert_debug_snapshot!( + call.syntax, + @r###" + INLINE_CALL@0..14 + TEXT@0..5 "call_" + TEXT@5..11 "square" + L_PARENS@11..12 "(" + TEXT@12..13 "4" + R_PARENS@13..14 ")" + "### + ); + + let call = to_inline_call("call_square[:results output](4)"); + insta::assert_debug_snapshot!( + call.syntax, + @r###" + INLINE_CALL@0..31 + TEXT@0..5 "call_" + TEXT@5..11 "square" + L_BRACKET@11..12 "[" + TEXT@12..27 ":results output" + R_BRACKET@27..28 "]" + L_PARENS@28..29 "(" + TEXT@29..30 "4" + R_PARENS@30..31 ")" + "### + ); + + let call = to_inline_call("call_square(4)[:results html]"); + insta::assert_debug_snapshot!( + call.syntax, + @r###" + INLINE_CALL@0..29 + TEXT@0..5 "call_" + TEXT@5..11 "square" + L_PARENS@11..12 "(" + TEXT@12..13 "4" + R_PARENS@13..14 ")" + L_BRACKET@14..15 "[" + TEXT@15..28 ":results html" + R_BRACKET@28..29 "]" + "### + ); + + let call = to_inline_call("call_square[:results output](4)[:results html]"); + insta::assert_debug_snapshot!( + call.syntax, + @r###" + INLINE_CALL@0..46 + TEXT@0..5 "call_" + TEXT@5..11 "square" + L_BRACKET@11..12 "[" + TEXT@12..27 ":results output" + R_BRACKET@27..28 "]" + L_PARENS@28..29 "(" + TEXT@29..30 "4" + R_PARENS@30..31 ")" + L_BRACKET@31..32 "[" + TEXT@32..45 ":results html" + R_BRACKET@45..46 "]" + "### + ); +} diff --git a/src/syntax/inline_src.rs b/src/syntax/inline_src.rs new file mode 100644 index 0000000..112c01f --- /dev/null +++ b/src/syntax/inline_src.rs @@ -0,0 +1,88 @@ +use nom::{ + bytes::complete::{tag, take_till, take_while1}, + combinator::{map, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + l_bracket_token, l_curly_token, node, r_bracket_token, r_curly_token, GreenElement, + }, + input::Input, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn inline_src_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + tag("src_"), + take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{'), + opt(tuple(( + l_bracket_token, + take_till(|c| c == '\n' || c == ']'), + r_bracket_token, + ))), + l_curly_token, + take_till(|c| c == '\n' || c == '}'), + r_curly_token, + )), + |(src, lang, options, l_curly, body, r_curly)| { + let mut children = vec![src.text_token(), lang.text_token()]; + if let Some((l_bracket, options, r_bracket)) = options { + children.push(l_bracket); + children.push(options.text_token()); + children.push(r_bracket); + } + children.push(l_curly); + children.push(body.text_token()); + children.push(r_curly); + node(SyntaxKind::INLINE_SRC, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::InlineSrc, tests::to_ast, ParseConfig}; + + let to_inline_src = to_ast::(inline_src_node); + + insta::assert_debug_snapshot!( + to_inline_src("src_C{int a = 0;}").syntax, + @r###" + INLINE_SRC@0..17 + TEXT@0..4 "src_" + TEXT@4..5 "C" + L_CURLY@5..6 "{" + TEXT@6..16 "int a = 0;" + R_CURLY@16..17 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_inline_src("src_xml[:exports code]{text}").syntax, + @r###" + INLINE_SRC@0..39 + TEXT@0..4 "src_" + TEXT@4..7 "xml" + L_BRACKET@7..8 "[" + TEXT@8..21 ":exports code" + R_BRACKET@21..22 "]" + L_CURLY@22..23 "{" + TEXT@23..38 "text" + R_CURLY@38..39 "}" + "### + ); + + let config = &ParseConfig::default(); + + assert!(inline_src_node(("src_xml[:exports code]{text", config).into()).is_err()); + assert!(inline_src_node(("src_[:exports code]{text}", config).into()).is_err()); + assert!(inline_src_node(("src_xml[:exports code]", config).into()).is_err()); +} diff --git a/src/syntax/input.rs b/src/syntax/input.rs new file mode 100644 index 0000000..307948c --- /dev/null +++ b/src/syntax/input.rs @@ -0,0 +1,242 @@ +use nom::{ + error::{ErrorKind, ParseError}, + Compare, CompareResult, Err, FindSubstring, IResult, InputIter, InputLength, InputTake, + InputTakeAtPosition, Needed, Offset, Slice, +}; +use std::{ + ops::{Deref, Range, RangeFrom, RangeFull, RangeTo}, + str::{CharIndices, Chars}, +}; + +use super::{ + combinator::{token, GreenElement}, + SyntaxKind, +}; +use crate::config::ParseConfig; + +/// A custom Input struct +/// +/// It helps us to pass the `ParseConfig` all the way down to each parsers +#[derive(Clone, Copy, Debug)] +pub struct Input<'a> { + pub(crate) s: &'a str, + pub(crate) c: &'a ParseConfig, +} + +impl<'a> Input<'a> { + #[inline] + pub(crate) fn of(&self, i: &'a str) -> Input<'a> { + Input { s: i, c: self.c } + } + + #[inline] + pub fn as_str(&self) -> &'a str { + self.s + } + + #[inline] + pub fn token(&self, kind: SyntaxKind) -> GreenElement { + token(kind, self.s) + } + + #[inline] + pub fn text_token(&self) -> GreenElement { + token(SyntaxKind::TEXT, self.s) + } + + #[inline] + pub fn ws_token(&self) -> GreenElement { + token(SyntaxKind::WHITESPACE, self.s) + } + + #[inline] + pub fn nl_token(&self) -> GreenElement { + token(SyntaxKind::NEW_LINE, self.s) + } +} + +impl<'a> Deref for Input<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &'a str { + self.s + } +} + +impl<'a> From<(&'a str, &'a ParseConfig)> for Input<'a> { + fn from(value: (&'a str, &'a ParseConfig)) -> Self { + Input { + s: value.0, + c: value.1, + } + } +} + +impl<'a> Slice> for Input<'a> { + fn slice(&self, range: Range) -> Self { + self.of(self.s.slice(range)) + } +} + +impl<'a> Slice> for Input<'a> { + fn slice(&self, range: RangeTo) -> Self { + self.of(self.s.slice(range)) + } +} + +impl<'a> Slice> for Input<'a> { + fn slice(&self, range: RangeFrom) -> Self { + self.of(self.s.slice(range)) + } +} + +impl<'a> Slice for Input<'a> { + fn slice(&self, range: RangeFull) -> Self { + self.of(self.s.slice(range)) + } +} + +impl<'a, 'b> FindSubstring<&'b str> for Input<'a> { + fn find_substring(&self, substr: &str) -> Option { + self.s.find(substr) + } +} + +impl<'a, 'b> Compare<&'b str> for Input<'a> { + #[inline] + fn compare(&self, t: &'b str) -> CompareResult { + self.s.compare(t) + } + + #[inline] + fn compare_no_case(&self, t: &'b str) -> CompareResult { + self.s.compare_no_case(t) + } +} + +impl<'a> InputLength for Input<'a> { + #[inline] + fn input_len(&self) -> usize { + self.len() + } +} + +impl<'a> InputIter for Input<'a> { + type Item = char; + type Iter = CharIndices<'a>; + type IterElem = Chars<'a>; + #[inline] + fn iter_indices(&self) -> Self::Iter { + self.s.char_indices() + } + #[inline] + fn iter_elements(&self) -> Self::IterElem { + self.s.chars() + } + fn position

    (&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.s.position(predicate) + } + #[inline] + fn slice_index(&self, count: usize) -> Result { + self.s.slice_index(count) + } +} + +impl<'a> InputTake for Input<'a> { + #[inline] + fn take(&self, count: usize) -> Self { + let s = self.s.take(count); + self.of(s) + } + #[inline] + fn take_split(&self, count: usize) -> (Self, Self) { + let (l, r) = self.s.take_split(count); + (self.of(l), self.of(r)) + } +} + +impl<'a> InputTakeAtPosition for Input<'a> { + type Item = char; + + #[inline] + fn split_at_position>(&self, predicate: P) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.s.split_at_position::<_, (&str, ErrorKind)>(predicate) { + Ok((l, r)) => Ok((self.of(l), self.of(r))), + Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), + Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), + Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), + } + } + + #[inline] + fn split_at_position1>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self + .s + .split_at_position1::<_, (&str, ErrorKind)>(predicate, e) + { + Ok((l, r)) => Ok((self.of(l), self.of(r))), + Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), + Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), + Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), + } + } + + #[inline] + fn split_at_position_complete>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self + .s + .split_at_position_complete::<_, (&str, ErrorKind)>(predicate) + { + Ok((l, r)) => Ok((self.of(l), self.of(r))), + Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), + Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), + Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), + } + } + + #[inline] + fn split_at_position1_complete>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self + .s + .split_at_position1_complete::<_, (&str, ErrorKind)>(predicate, e) + { + Ok((l, r)) => Ok((self.of(l), self.of(r))), + Err(Err::Error((i, kind))) => Err(Err::Error(E::from_error_kind(self.of(i), kind))), + Err(Err::Failure((i, kind))) => Err(Err::Failure(E::from_error_kind(self.of(i), kind))), + Err(Err::Incomplete(x)) => Err(Err::Incomplete(x)), + } + } +} + +impl<'a> Offset for Input<'a> { + fn offset(&self, second: &Self) -> usize { + self.s.offset(second.s) + } +} diff --git a/src/syntax/keyword.rs b/src/syntax/keyword.rs new file mode 100644 index 0000000..327f46f --- /dev/null +++ b/src/syntax/keyword.rs @@ -0,0 +1,290 @@ +#![allow(clippy::type_complexity)] + +use nom::{ + branch::alt, + bytes::complete::{tag, take_till, take_while1}, + character::complete::space0, + combinator::{recognize, verify}, + sequence::tuple, + IResult, InputTake, +}; + +use super::{ + combinator::{blank_lines, hash_plus_token, node, trim_line_end, GreenElement}, + input::Input, + SyntaxKind, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn keyword_node(input: Input) -> IResult { + fn f(input: Input) -> IResult { + let (input, (key, mut nodes)) = keyword_node_base(input)?; + let (input, post_blank) = blank_lines(input)?; + nodes.extend(post_blank); + Ok(( + input, + node( + if key == "CALL" { + SyntaxKind::BABEL_CALL + } else { + SyntaxKind::KEYWORD + }, + nodes, + ), + )) + } + crate::lossless_parser!(f, input) +} + +/// Return empty vector if input doesn't contain affiliated keyword, or affiliated keyword is +/// followed by blank lines. +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn affiliated_keyword_nodes(input: Input) -> IResult, ()> { + let mut children = vec![]; + let mut i = input; + + while !i.is_empty() { + let Ok((input_, (key, nodes))) = keyword_node_base(i) else { + break; + }; + + let (input_, post_blank) = blank_lines(input_)?; + + // affiliated keyword can not followed by blank lines or eof + if !post_blank.is_empty() || input_.is_empty() { + return Ok((input, vec![])); + } + + if input_.c.affiliated_keywords.iter().all(|w| w != key) && !key.starts_with("ATTR_") { + break; + } + + debug_assert!(i.len() > input_.len(), "{} > {}", i.len(), input_.len()); + i = input_; + children.push(node(SyntaxKind::AFFILIATED_KEYWORD, nodes)); + } + + Ok((i, children)) +} + +pub fn tblfm_keyword_nodes(input: Input) -> IResult, ()> { + let mut children = vec![]; + let mut i = input; + + while !i.is_empty() { + let Ok((input, (key, nodes))) = keyword_node_base(i) else { + break; + }; + + if !key.eq_ignore_ascii_case("TBLFM") { + break; + } + + debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); + i = input; + children.push(node(SyntaxKind::KEYWORD, nodes)); + } + + Ok((i, children)) +} + +fn keyword_node_base(input: Input) -> IResult), ()> { + let (input, (ws, hash_plus)) = tuple((space0, hash_plus_token))(input)?; + + let (input, (key, optional, colon)) = alt((key_with_optional, key))(input)?; + + let (input, (value, ws_, nl)) = trim_line_end(input)?; + + let mut children = vec![]; + if !ws.is_empty() { + children.push(ws.ws_token()); + } + children.push(hash_plus); + children.push(key.text_token()); + if let Some((l_bracket, optional, r_bracket)) = optional { + children.push(l_bracket.token(SyntaxKind::L_BRACKET)); + children.push(optional.text_token()); + children.push(r_bracket.token(SyntaxKind::R_BRACKET)); + } + children.push(colon.token(SyntaxKind::COLON)); + children.push(value.text_token()); + if !ws_.is_empty() { + children.push(ws_.ws_token()); + } + if !nl.is_empty() { + children.push(nl.nl_token()); + } + + Ok((input, (key.s, children))) +} + +fn key(input: Input) -> IResult, Input), ()> { + let (input, output) = verify( + recognize(tuple(( + take_till(|c: char| c.is_ascii_whitespace() || c == ':'), + take_while1(|c: char| c == ':'), + ))), + |i: &Input| i.len() >= 2, + )(input)?; + let (colon, key) = output.take_split(output.len() - 1); + Ok((input, (key, None, colon))) +} + +fn key_with_optional( + input: Input, +) -> IResult, Input), ()> { + let (input, (key, r_backer, optional, l_backer, colon)) = tuple(( + alt((tag("CAPTION"), tag("RESULTS"))), + tag("["), + take_till(|c| c == '\r' || c == '\n' || c == ']'), + tag("]"), + tag(":"), + ))(input)?; + Ok((input, (key, Some((r_backer, optional, l_backer)), colon))) +} + +#[test] +fn parse() { + use crate::{ + ast::{BabelCall, Keyword}, + tests::to_ast, + ParseConfig, + }; + + let to_keyword = to_ast::(keyword_node); + + let to_babel_call = to_ast::(keyword_node); + + to_keyword("#+KEY:"); + to_keyword("#+::"); + to_keyword("#+::"); + to_keyword("#+:: "); + to_keyword("#+:: \n"); + to_keyword("#+::\n"); + + insta::assert_debug_snapshot!( + to_keyword("#+KEY:").syntax, + @r###" + KEYWORD@0..6 + HASH_PLUS@0..2 "#+" + TEXT@2..5 "KEY" + COLON@5..6 ":" + TEXT@6..6 "" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+KEY: VALUE").syntax, + @r###" + KEYWORD@0..12 + HASH_PLUS@0..2 "#+" + TEXT@2..5 "KEY" + COLON@5..6 ":" + TEXT@6..12 " VALUE" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+K_E_Y: VALUE").syntax, + @r###" + KEYWORD@0..14 + HASH_PLUS@0..2 "#+" + TEXT@2..7 "K_E_Y" + COLON@7..8 ":" + TEXT@8..14 " VALUE" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+KEY:VALUE\n").syntax, + @r###" + KEYWORD@0..12 + HASH_PLUS@0..2 "#+" + TEXT@2..5 "KEY" + COLON@5..6 ":" + TEXT@6..11 "VALUE" + NEW_LINE@11..12 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+RESULTS:").syntax, + @r###" + KEYWORD@0..10 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "RESULTS" + COLON@9..10 ":" + TEXT@10..10 "" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+ATTR_LATEX: :width 5cm\n").syntax, + @r###" + KEYWORD@0..25 + HASH_PLUS@0..2 "#+" + TEXT@2..12 "ATTR_LATEX" + COLON@12..13 ":" + TEXT@13..24 " :width 5cm" + NEW_LINE@24..25 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_babel_call("#+CALL: double(n=4)").syntax, + @r###" + BABEL_CALL@0..19 + HASH_PLUS@0..2 "#+" + TEXT@2..6 "CALL" + COLON@6..7 ":" + TEXT@7..19 " double(n=4)" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+ABC[OPTIONAL]: Longer value.").syntax, + @r###" + KEYWORD@0..30 + HASH_PLUS@0..2 "#+" + TEXT@2..15 "ABC[OPTIONAL]" + COLON@15..16 ":" + TEXT@16..30 " Longer value." + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+CAPTION: value").syntax, + @r###" + KEYWORD@0..16 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "CAPTION" + COLON@9..10 ":" + TEXT@10..16 " value" + "### + ); + + insta::assert_debug_snapshot!( + to_keyword("#+CAPTION[caption optional]: value").syntax, + @r###" + KEYWORD@0..34 + HASH_PLUS@0..2 "#+" + TEXT@2..9 "CAPTION" + L_BRACKET@9..10 "[" + TEXT@10..26 "caption optional" + R_BRACKET@26..27 "]" + COLON@27..28 ":" + TEXT@28..34 " value" + "### + ); + + let config = &ParseConfig::default(); + + assert!(keyword_node(("#+KE Y: VALUE", config).into()).is_err()); + assert!(keyword_node(("#+ KEY: VALUE", config).into()).is_err()); +} diff --git a/src/syntax/latex_environment.rs b/src/syntax/latex_environment.rs new file mode 100644 index 0000000..8e20e21 --- /dev/null +++ b/src/syntax/latex_environment.rs @@ -0,0 +1,127 @@ +use nom::{ + bytes::complete::{tag, take_while1}, + character::complete::space0, + sequence::tuple, + IResult, InputTake, +}; + +use crate::SyntaxKind; + +use super::{ + combinator::{eol_or_eof, l_curly_token, line_starts_iter, node, r_curly_token, GreenElement}, + input::Input, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn latex_environment_node(input: Input) -> IResult { + crate::lossless_parser!(latex_environment_node_base, input) +} + +fn latex_environment_node_base(input: Input) -> IResult { + let (input, (ws1, begin, l1, name1, r1)) = tuple(( + space0, + tag("\\begin"), + l_curly_token, + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '*'), + r_curly_token, + ))(input)?; + + for (input, contents) in line_starts_iter(input.s).map(|i| input.take_split(i)) { + if let Ok((input, (ws2, end, l2, name2, r2, ws3, nl))) = tuple(( + space0, + tag("\\end"), + l_curly_token, + tag(name1.s), + r_curly_token, + space0, + eol_or_eof, + ))(input) + { + return Ok(( + input, + node( + SyntaxKind::LATEX_ENVIRONMENT, + [ + ws1.ws_token(), + begin.text_token(), + l1, + name1.text_token(), + r1, + contents.text_token(), + ws2.ws_token(), + end.text_token(), + l2, + name2.text_token(), + r2, + ws3.ws_token(), + nl.nl_token(), + ], + ), + )); + } + } + + Err(nom::Err::Error(())) +} + +#[test] +fn parse() { + use crate::ast::LatexEnvironment; + use crate::config::ParseConfig; + use crate::tests::to_ast; + + let to_latex = to_ast::(latex_environment_node); + + insta::assert_debug_snapshot!( + to_latex(r"\begin{NAME}\end{NAME}").syntax, + @r###" + LATEX_ENVIRONMENT@0..22 + WHITESPACE@0..0 "" + TEXT@0..6 "\\begin" + L_CURLY@6..7 "{" + TEXT@7..11 "NAME" + R_CURLY@11..12 "}" + TEXT@12..12 "" + WHITESPACE@12..12 "" + TEXT@12..16 "\\end" + L_CURLY@16..17 "{" + TEXT@17..21 "NAME" + R_CURLY@21..22 "}" + WHITESPACE@22..22 "" + NEW_LINE@22..22 "" + "### + ); + + insta::assert_debug_snapshot!( + to_latex( + r"\begin{align*} + 2x - 5y &= 8 \\ + 3x + 9y &= -12 + \end{align*}" + ).syntax, + @r###" + LATEX_ENVIRONMENT@0..70 + WHITESPACE@0..0 "" + TEXT@0..6 "\\begin" + L_CURLY@6..7 "{" + TEXT@7..13 "align*" + R_CURLY@13..14 "}" + TEXT@14..54 "\n 2x - 5y &= 8 \\\\\n ..." + WHITESPACE@54..58 " " + TEXT@58..62 "\\end" + L_CURLY@62..63 "{" + TEXT@63..69 "align*" + R_CURLY@69..70 "}" + WHITESPACE@70..70 "" + NEW_LINE@70..70 "" + "### + ); + + let c = ParseConfig::default(); + + assert!(latex_environment_node((r"\begin{equation}\end{align}", &c).into()).is_err()); + assert!(latex_environment_node((r"\begin{_}\end{_}", &c).into()).is_err()); +} diff --git a/src/syntax/latex_fragment.rs b/src/syntax/latex_fragment.rs new file mode 100644 index 0000000..7ea5ec3 --- /dev/null +++ b/src/syntax/latex_fragment.rs @@ -0,0 +1,199 @@ +use nom::{ + branch::alt, + bytes::complete::{take_until1, take_while1}, + character::complete::alpha1, + sequence::tuple, + IResult, InputTake, +}; + +use crate::SyntaxKind; + +use super::{ + combinator::{ + backslash_token, dollar2_token, dollar_token, l_bracket_token, l_curly_token, + l_parens_token, node, r_bracket_token, r_curly_token, r_parens_token, GreenElement, + }, + input::Input, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn latex_fragment_node(input: Input) -> IResult { + debug_assert!(input.s.starts_with(['\\', '$'])); + let mut parser = alt((template1, template2, template3, template4, template5)); + crate::lossless_parser!(parser, input) +} + +// \NAME[CONTENTS1] \NAME{CONTENTS1} +fn template1(input: Input) -> IResult { + let (input, (backslash, name)) = tuple((backslash_token, alpha1))(input)?; + let (input, (l, content, r)) = alt(( + tuple(( + l_bracket_token, + take_while1(|c| c != '{' && c != '}' && c != '[' && c != ']' && c != '\r' && c != '\n'), + r_bracket_token, + )), + tuple(( + l_curly_token, + take_while1(|c| c != '{' && c != '}' && c != '\r' && c != '\n'), + r_curly_token, + )), + ))(input)?; + Ok(( + input, + node( + SyntaxKind::LATEX_FRAGMENT, + [backslash, name.text_token(), l, content.text_token(), r], + ), + )) +} + +// \(CONTENTS\) +fn template2(input: Input) -> IResult { + let (input, (backslash1, l)) = tuple((backslash_token, l_parens_token))(input)?; + if let Some(i) = jetscii::Substring::new("\\)").find(input.s) { + let (input, content) = input.take_split(i); + let (input, (backslash2, r)) = tuple((backslash_token, r_parens_token))(input)?; + Ok(( + input, + node( + SyntaxKind::LATEX_FRAGMENT, + [backslash1, l, content.text_token(), backslash2, r], + ), + )) + } else { + Err(nom::Err::Error(())) + } +} + +// \[CONTENTS\] +fn template3(input: Input) -> IResult { + let (input, (backslash1, l)) = tuple((backslash_token, l_bracket_token))(input)?; + if let Some(i) = jetscii::Substring::new("\\]").find(input.s) { + let (input, content) = input.take_split(i); + let (input, (backslash2, r)) = tuple((backslash_token, r_bracket_token))(input)?; + Ok(( + input, + node( + SyntaxKind::LATEX_FRAGMENT, + [backslash1, l, content.text_token(), backslash2, r], + ), + )) + } else { + Err(nom::Err::Error(())) + } +} + +// $$CONTENTS$$ +fn template4(input: Input) -> IResult { + let (input, l) = dollar2_token(input)?; + let (input, content) = take_until1("$$")(input)?; + let (input, r) = dollar2_token(input)?; + Ok(( + input, + node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]), + )) +} + +// $CONTENTS$ +fn template5(input: Input) -> IResult { + let (input, l) = dollar_token(input)?; + let (input, content) = take_until1("$")(input)?; + let (input, r) = dollar_token(input)?; + + let b = content.as_bytes()[0]; + if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b';' | b'$') { + return Err(nom::Err::Error(())); + } + + let b = content.as_bytes()[content.s.len() - 1]; + if matches!(b, b'\r' | b'\n' | b' ' | b'\t' | b'.' | b',' | b'$') { + return Err(nom::Err::Error(())); + } + + let p = input.bytes().next(); + if let Some(p) = p { + if !matches!(p, b')' | b'}' | b']' | b'\'' | b'"' | b' ' | b'\r' | b'\n') { + return Err(nom::Err::Error(())); + } + } + + Ok(( + input, + node(SyntaxKind::LATEX_FRAGMENT, [l, content.text_token(), r]), + )) +} + +#[test] +fn parse() { + use crate::{ast::LatexFragment, tests::to_ast, ParseConfig}; + + let to_fragment = to_ast::(latex_fragment_node); + + insta::assert_debug_snapshot!( + to_fragment("\\enlargethispage{2\\baselineskip}").syntax, + @r###" + LATEX_FRAGMENT@0..32 + BACKSLASH@0..1 "\\" + TEXT@1..16 "enlargethispage" + L_CURLY@16..17 "{" + TEXT@17..31 "2\\baselineskip" + R_CURLY@31..32 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_fragment("\\[a\\]").syntax, + @r###" + LATEX_FRAGMENT@0..5 + BACKSLASH@0..1 "\\" + L_BRACKET@1..2 "[" + TEXT@2..3 "a" + BACKSLASH@3..4 "\\" + R_BRACKET@4..5 "]" + "### + ); + + insta::assert_debug_snapshot!( + to_fragment("\\(e^{i \\pi}\\)").syntax, + @r###" + LATEX_FRAGMENT@0..13 + BACKSLASH@0..1 "\\" + L_PARENS@1..2 "(" + TEXT@2..11 "e^{i \\pi}" + BACKSLASH@11..12 "\\" + R_PARENS@12..13 ")" + "### + ); + + insta::assert_debug_snapshot!( + to_fragment("$\\frac{1}{3}$").syntax, + @r###" + LATEX_FRAGMENT@0..13 + DOLLAR@0..1 "$" + TEXT@1..12 "\\frac{1}{3}" + DOLLAR@12..13 "$" + "### + ); + + insta::assert_debug_snapshot!( + to_fragment("$a\nb$").syntax, + @r###" + LATEX_FRAGMENT@0..5 + DOLLAR@0..1 "$" + TEXT@1..4 "a\nb" + DOLLAR@4..5 "$" + "### + ); + + let c = ParseConfig::default(); + + assert!(latex_fragment_node(("$ LaTeXxxx$", &c).into()).is_err()); + assert!(latex_fragment_node(("$LaTeXxxx $", &c).into()).is_err()); + assert!(latex_fragment_node(("$a.$", &c).into()).is_err()); + assert!(latex_fragment_node(("$a$a", &c).into()).is_err()); + assert!(latex_fragment_node(("$$b\nol\nd*", &c).into()).is_err()); + assert!(latex_fragment_node(("$b\nol\nd*", &c).into()).is_err()); +} diff --git a/src/syntax/line_break.rs b/src/syntax/line_break.rs new file mode 100644 index 0000000..95b5789 --- /dev/null +++ b/src/syntax/line_break.rs @@ -0,0 +1,71 @@ +use nom::{character::complete::space0, combinator::map, sequence::tuple, IResult}; + +use crate::{ + syntax::combinator::{backslash_token, eol_or_eof, node}, + SyntaxKind, +}; + +use super::{combinator::GreenElement, input::Input}; + +pub fn line_break_node(input: Input) -> IResult { + debug_assert!(input.s.starts_with('\\')); + let mut parser = map( + tuple((backslash_token, backslash_token, space0, eol_or_eof)), + |(b1, b2, ws, nl)| { + node( + SyntaxKind::LINE_BREAK, + [b1, b2, ws.ws_token(), nl.nl_token()], + ) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::ast::LineBreak; + use crate::tests::to_ast; + + let to_line_break = to_ast::(line_break_node); + + insta::assert_debug_snapshot!( + to_line_break("\\\\\n").syntax, + @r###" + LINE_BREAK@0..3 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..2 "" + NEW_LINE@2..3 "\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\ \n").syntax, + @r###" + LINE_BREAK@0..6 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..5 " " + NEW_LINE@5..6 "\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\\r\n").syntax, + @r###" + LINE_BREAK@0..4 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..2 "" + NEW_LINE@2..4 "\r\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\ ").syntax, + @r###" + LINE_BREAK@0..6 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..6 " " + NEW_LINE@6..6 "" + "### + ); +} diff --git a/src/syntax/link.rs b/src/syntax/link.rs new file mode 100644 index 0000000..a2aa4d9 --- /dev/null +++ b/src/syntax/link.rs @@ -0,0 +1,112 @@ +use nom::{ + bytes::complete::take_while, + combinator::{map, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + l_bracket2_token, l_bracket_token, node, r_bracket2_token, r_bracket_token, GreenElement, + }, + input::Input, + object::link_description_object_nodes, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn link_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_bracket2_token, + take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), + opt(tuple(( + r_bracket_token, + l_bracket_token, + take_while(|c: char| c != '[' && c != ']'), + ))), + r_bracket2_token, + )), + |(l_bracket2, path, desc, r_bracket2)| { + let mut children = vec![l_bracket2, path.token(LINK_PATH)]; + + if let Some((r_bracket, l_bracket, desc)) = desc { + children.extend([r_bracket, l_bracket]); + children.extend(link_description_object_nodes(desc)); + } + + children.push(r_bracket2); + + node(LINK, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::Link, tests::to_ast, ParseConfig}; + + let to_link = to_ast::(link_node); + + let link = to_link("[[#id]]"); + insta::assert_debug_snapshot!( + link.syntax, + @r###" + LINK@0..7 + L_BRACKET2@0..2 "[[" + LINK_PATH@2..5 "#id" + R_BRACKET2@5..7 "]]" + "### + ); + + let link = to_link("[[#id][desc]]"); + insta::assert_debug_snapshot!( + link.syntax, + @r###" + LINK@0..13 + L_BRACKET2@0..2 "[[" + LINK_PATH@2..5 "#id" + R_BRACKET@5..6 "]" + L_BRACKET@6..7 "[" + TEXT@7..11 "desc" + R_BRACKET2@11..13 "]]" + "### + ); + + let link = to_link("[[file:/home/dominik/images/jupiter.jpg]]"); + insta::assert_debug_snapshot!( + link.syntax, + @r###" + LINK@0..41 + L_BRACKET2@0..2 "[[" + LINK_PATH@2..39 "file:/home/dominik/im ..." + R_BRACKET2@39..41 "]]" + "### + ); + + let link = to_link("[[https://orgmode.org][*bold* description]]"); + insta::assert_debug_snapshot!( + link.syntax, + @r###" + LINK@0..43 + L_BRACKET2@0..2 "[[" + LINK_PATH@2..21 "https://orgmode.org" + R_BRACKET@21..22 "]" + L_BRACKET@22..23 "[" + BOLD@23..29 + STAR@23..24 "*" + TEXT@24..28 "bold" + STAR@28..29 "*" + TEXT@29..41 " description" + R_BRACKET2@41..43 "]]" + "### + ); + + let config = &ParseConfig::default(); + + assert!(link_node(("[[#id][desc]", config).into()).is_err()); +} diff --git a/src/syntax/list.rs b/src/syntax/list.rs new file mode 100644 index 0000000..dbbd666 --- /dev/null +++ b/src/syntax/list.rs @@ -0,0 +1,617 @@ +use memchr::{memchr, memchr2}; +use nom::{ + branch::alt, + bytes::complete::{tag, take}, + character::complete::{alphanumeric1, digit1, space0, space1}, + combinator::{cond, map, opt, recognize, verify}, + sequence::{preceded, tuple}, + IResult, InputTake, +}; + +use super::{ + combinator::{ + at_token, blank_lines, colon2_token, eol_or_eof, l_bracket_token, line_starts_iter, node, + r_bracket_token, GreenElement, + }, + element::element_node, + input::Input, + keyword::affiliated_keyword_nodes, + object::standard_object_nodes, + paragraph::paragraph_nodes, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn list_node(input: Input) -> IResult { + crate::lossless_parser!(list_node_base, input) +} + +fn list_node_base(input: Input) -> IResult { + let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?; + let (input, first_indent) = space0(input)?; + let (input, (ends_with_empty_blank_lines, first_item)) = list_item_node(first_indent, input)?; + + let mut children = vec![]; + children.extend(affiliated_keywords); + children.push(first_item); + + let mut input = input; + while !ends_with_empty_blank_lines && !input.is_empty() { + let (input_, indent) = space0(input)?; + + if indent.len() != first_indent.len() { + break; + } + + let Ok((input_, (ends_with_empty_blank_lines, list_item))) = list_item_node(indent, input_) + else { + break; + }; + + children.push(list_item); + debug_assert!( + input.len() > input_.len(), + "{} > {}", + input.len(), + input_.len(), + ); + input = input_; + + if ends_with_empty_blank_lines { + break; + } + } + + let (input, post_blank) = blank_lines(input)?; + + children.extend(post_blank); + + Ok((input, node(LIST, children))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input, indent), fields(input = input.s)) +)] +fn list_item_node<'a>( + indent: Input<'a>, + input: Input<'a>, +) -> IResult, (bool, GreenElement), ()> { + let (input, bullet) = recognize(tuple(( + alt(( + tag("+"), + tag("*"), + tag("-"), + preceded(digit1, tag(".")), + preceded(digit1, tag(")")), + )), + alt((space1, eol_or_eof)), + )))(input)?; + + // list item cannot have an asterisk at the beginning of line + if indent.is_empty() && bullet.s.starts_with('*') { + return Err(nom::Err::Error(())); + } + + if input.is_empty() { + return Ok(( + input, + ( + false, + node( + LIST_ITEM, + [ + indent.token(LIST_ITEM_INDENT), + bullet.token(LIST_ITEM_BULLET), + ], + ), + ), + )); + } + + let is_ordered = bullet.s.starts_with(|c: char| c.is_ascii_digit()); + let (input, counter) = opt(list_item_counter)(input)?; + let (input, checkbox) = opt(list_item_checkbox)(input)?; + let (input, tag) = cond(!is_ordered, opt(list_item_tag))(input)?; + let (input, (ends_with_empty_blank_lines, content)) = + list_item_content_node(input, indent.len())?; + let (input, post_blank) = cond(!ends_with_empty_blank_lines, blank_lines)(input)?; + + let mut children = vec![ + indent.token(LIST_ITEM_INDENT), + bullet.token(LIST_ITEM_BULLET), + ]; + + if let Some((counter, ws)) = counter { + children.extend([counter, ws.ws_token()]); + } + if let Some((checkbox, ws)) = checkbox { + children.extend([checkbox, ws.ws_token()]); + } + if let Some(Some((tag, ws))) = tag { + children.extend([tag, ws.ws_token()]); + } + + children.push(content); + if let Some(post_blank) = post_blank { + children.extend(post_blank); + } + + Ok(( + input, + (ends_with_empty_blank_lines, node(LIST_ITEM, children)), + )) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn list_item_counter(input: Input) -> IResult { + let (input, node) = map( + tuple((l_bracket_token, at_token, alphanumeric1, r_bracket_token)), + |(l_bracket, at, char, r_bracket)| { + node( + LIST_ITEM_COUNTER, + [l_bracket, at, char.text_token(), r_bracket], + ) + }, + )(input)?; + + let (input, ws) = space0(input)?; + + Ok((input, (node, ws))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn list_item_checkbox(input: Input) -> IResult { + let (input, node) = map( + tuple(( + l_bracket_token, + verify(take(1usize), |input: &Input| { + input.s == " " || input.s == "X" || input.s == "-" + }), + r_bracket_token, + )), + |(l_bracket, char, r_bracket)| { + node( + LIST_ITEM_CHECK_BOX, + [l_bracket, char.text_token(), r_bracket], + ) + }, + )(input)?; + + let (input, ws) = space0(input)?; + + Ok((input, (node, ws))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn list_item_tag(input: Input) -> IResult { + let bytes = input.as_bytes(); + + let (input, tag) = match memchr2(b'\n', b':', bytes) { + Some(idx) if idx > 0 && bytes[idx] == b':' => input.take_split(idx), + _ => return Err(nom::Err::Error(())), + }; + let (input, ws) = space0(input)?; + let (input, colon2) = colon2_token(input)?; + + let mut children = standard_object_nodes(tag); + children.push(colon2); + + Ok((input, (node(LIST_ITEM_TAG, children), ws))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +fn list_item_content_node(input: Input, indent: usize) -> IResult { + if memchr(b'\n', input.as_bytes()).is_none() { + return Ok(( + input.of(""), + ( + false, + node( + LIST_ITEM_CONTENT, + [node(PARAGRAPH, standard_object_nodes(input))], + ), + ), + )); + }; + + let mut skip_one = true; + let mut i = input; + let mut children = vec![]; + let mut previous_blank_line: Option<(Input, Input)> = None; + 'l: while !i.is_empty() { + for (input, head) in line_starts_iter(i.as_str()) + // the first line in list item content will always be a paragraph + // so we need to skip it in the first iteration + .skip(if skip_one { 1 } else { 0 }) + .map(|idx| i.take_split(idx)) + { + match get_line_indent(input.as_str()) { + Some(next_indent) => { + if next_indent <= indent { + let (input, head) = previous_blank_line.unwrap_or((input, head)); + if !head.is_empty() { + children.extend(paragraph_nodes(head)?); + } + return Ok((input, (false, node(LIST_ITEM_CONTENT, children)))); + } + + previous_blank_line = None; + + if let Ok((input, element)) = element_node(input) { + if !head.is_empty() { + children.extend(paragraph_nodes(head)?); + } + children.push(element); + debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); + i = input; + skip_one = false; + continue 'l; + } + } + _ => { + // list item ends at two consecutive empty lines + if let Some((input, head)) = previous_blank_line { + if !head.is_empty() { + children.extend(paragraph_nodes(head)?); + } + + return Ok((input, (true, node(LIST_ITEM_CONTENT, children)))); + } else { + previous_blank_line = Some((input, head)) + } + } + } + } + children.extend(paragraph_nodes(i)?); + break; + } + + Ok((input.of(""), (false, node(LIST_ITEM_CONTENT, children)))) +} + +fn get_line_indent(input: &str) -> Option { + input + .bytes() + .take_while(|b| *b != b'\n') + .position(|b| !b.is_ascii_whitespace()) +} + +#[test] +fn parse() { + use crate::{ast::List, tests::to_ast, ParseConfig}; + + let to_list = to_ast::(list_node); + + insta::assert_debug_snapshot!( + to_list("1)").syntax, + @r###" + LIST@0..2 + LIST_ITEM@0..2 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "1)" + "### + ); + + insta::assert_debug_snapshot!( + to_list("+ ").syntax, + @r###" + LIST@0..2 + LIST_ITEM@0..2 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + "### + ); + + insta::assert_debug_snapshot!( + to_list("-\n").syntax, + @r###" + LIST@0..2 + LIST_ITEM@0..2 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "-\n" + "### + ); + + insta::assert_debug_snapshot!( + to_list("+ 1").syntax, + @r###" + LIST@0..3 + LIST_ITEM@0..3 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..3 + PARAGRAPH@2..3 + TEXT@2..3 "1" + "### + ); + + insta::assert_debug_snapshot!( + to_list("+ 1\n").syntax, + @r###" + LIST@0..4 + LIST_ITEM@0..4 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..4 + PARAGRAPH@2..4 + TEXT@2..4 "1\n" + "### + ); + + // list ends with two consecutive blank lines, and these blank lines + // will be the post_blank of list node + insta::assert_debug_snapshot!( + to_list("+ [@A] 1\n\n\n+ 2").syntax, + @r###" + LIST@0..11 + LIST_ITEM@0..9 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_COUNTER@2..6 + L_BRACKET@2..3 "[" + AT@3..4 "@" + TEXT@4..5 "A" + R_BRACKET@5..6 "]" + WHITESPACE@6..7 " " + LIST_ITEM_CONTENT@7..9 + PARAGRAPH@7..9 + TEXT@7..9 "1\n" + BLANK_LINE@9..10 "\n" + BLANK_LINE@10..11 "\n" + "### + ); + + // empty line between list item, the empty line will be + // the post_blank of first item + insta::assert_debug_snapshot!( + to_list("+ *TAG* :: item1\n\n+ [X] item2").syntax, + @r###" + LIST@0..29 + LIST_ITEM@0..18 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_TAG@2..10 + BOLD@2..7 + STAR@2..3 "*" + TEXT@3..6 "TAG" + STAR@6..7 "*" + TEXT@7..8 " " + COLON2@8..10 "::" + WHITESPACE@10..10 "" + LIST_ITEM_CONTENT@10..17 + PARAGRAPH@10..17 + TEXT@10..17 " item1\n" + BLANK_LINE@17..18 "\n" + LIST_ITEM@18..29 + LIST_ITEM_INDENT@18..18 "" + LIST_ITEM_BULLET@18..20 "+ " + LIST_ITEM_CHECK_BOX@20..23 + L_BRACKET@20..21 "[" + TEXT@21..22 "X" + R_BRACKET@22..23 "]" + WHITESPACE@23..24 " " + LIST_ITEM_CONTENT@24..29 + PARAGRAPH@24..29 + TEXT@24..29 "item2" + "### + ); + + // nested list + let list = to_list( + r#"+ item1 + + item2"#, + ); + insta::assert_debug_snapshot!( + list.syntax, + @r###" + LIST@0..17 + LIST_ITEM@0..17 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..17 + PARAGRAPH@2..8 + TEXT@2..8 "item1\n" + LIST@8..17 + LIST_ITEM@8..17 + LIST_ITEM_INDENT@8..10 " " + LIST_ITEM_BULLET@10..12 "+ " + LIST_ITEM_CONTENT@12..17 + PARAGRAPH@12..17 + TEXT@12..17 "item2" + "### + ); + + insta::assert_debug_snapshot!( + to_list("+ item1\nitem2").syntax, + @r###" + LIST@0..8 + LIST_ITEM@0..8 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..8 + PARAGRAPH@2..8 + TEXT@2..8 "item1\n" + "### + ); + + insta::assert_debug_snapshot!( + to_list("+ item1\n\n still item 1").syntax, + @r###" + LIST@0..23 + LIST_ITEM@0..23 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..23 + PARAGRAPH@2..9 + TEXT@2..8 "item1\n" + BLANK_LINE@8..9 "\n" + PARAGRAPH@9..23 + TEXT@9..23 " still item 1" + "### + ); + + let list = to_list( + r#"+ item1 + + item2 + "#, + ); + insta::assert_debug_snapshot!( + list.syntax, + @r###" + LIST@0..26 + LIST_ITEM@0..26 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..2 "+ " + LIST_ITEM_CONTENT@2..26 + PARAGRAPH@2..8 + TEXT@2..8 "item1\n" + LIST@8..26 + LIST_ITEM@8..26 + LIST_ITEM_INDENT@8..14 " " + LIST_ITEM_BULLET@14..16 "+ " + LIST_ITEM_CONTENT@16..26 + PARAGRAPH@16..26 + TEXT@16..22 "item2\n" + BLANK_LINE@22..26 " " + "### + ); + + let list = to_list( + r#"1. item1 + + - item2 + +3. item 3"#, + ); + assert!(list.is_ordered()); + insta::assert_debug_snapshot!( + list.syntax, + @r###" + LIST@0..32 + LIST_ITEM@0..23 + LIST_ITEM_INDENT@0..0 "" + LIST_ITEM_BULLET@0..3 "1. " + LIST_ITEM_CONTENT@3..23 + PARAGRAPH@3..10 + TEXT@3..9 "item1\n" + BLANK_LINE@9..10 "\n" + LIST@10..23 + LIST_ITEM@10..23 + LIST_ITEM_INDENT@10..14 " " + LIST_ITEM_BULLET@14..16 "- " + LIST_ITEM_CONTENT@16..22 + PARAGRAPH@16..22 + TEXT@16..22 "item2\n" + BLANK_LINE@22..23 "\n" + LIST_ITEM@23..32 + LIST_ITEM_INDENT@23..23 "" + LIST_ITEM_BULLET@23..26 "3. " + LIST_ITEM_CONTENT@26..32 + PARAGRAPH@26..32 + TEXT@26..32 "item 3" + "### + ); + + // nested list + insta::assert_debug_snapshot!( + to_list(" + item1\n\n + item2").syntax, + @r###" + LIST@0..20 + LIST_ITEM@0..11 + LIST_ITEM_INDENT@0..2 " " + LIST_ITEM_BULLET@2..4 "+ " + LIST_ITEM_CONTENT@4..10 + PARAGRAPH@4..10 + TEXT@4..10 "item1\n" + BLANK_LINE@10..11 "\n" + LIST_ITEM@11..20 + LIST_ITEM_INDENT@11..13 " " + LIST_ITEM_BULLET@13..15 "+ " + LIST_ITEM_CONTENT@15..20 + PARAGRAPH@15..20 + TEXT@15..20 "item2" + "### + ); + + insta::assert_debug_snapshot!( + to_list(" 1. item1\n 2. item2\n 3. item3").syntax, + @r###" + LIST@0..42 + LIST_ITEM@0..42 + LIST_ITEM_INDENT@0..2 " " + LIST_ITEM_BULLET@2..5 "1. " + LIST_ITEM_CONTENT@5..42 + PARAGRAPH@5..11 + TEXT@5..11 "item1\n" + LIST@11..28 + LIST_ITEM@11..28 + LIST_ITEM_INDENT@11..19 " " + LIST_ITEM_BULLET@19..22 "2. " + LIST_ITEM_CONTENT@22..28 + PARAGRAPH@22..28 + TEXT@22..28 "item2\n" + LIST@28..42 + LIST_ITEM@28..42 + LIST_ITEM_INDENT@28..34 " " + LIST_ITEM_BULLET@34..37 "3. " + LIST_ITEM_CONTENT@37..42 + PARAGRAPH@37..42 + TEXT@37..42 "item3" + "### + ); + + // Indentation of lines within other greater elements do not count + insta::assert_debug_snapshot!( + to_list(" 1. item1\n #+begin_example\nhello\n#+end_example\n").syntax, + @r###" + LIST@0..51 + LIST_ITEM@0..51 + LIST_ITEM_INDENT@0..2 " " + LIST_ITEM_BULLET@2..5 "1. " + LIST_ITEM_CONTENT@5..51 + PARAGRAPH@5..11 + TEXT@5..11 "item1\n" + EXAMPLE_BLOCK@11..51 + BLOCK_BEGIN@11..31 + WHITESPACE@11..15 " " + TEXT@15..23 "#+begin_" + TEXT@23..30 "example" + NEW_LINE@30..31 "\n" + BLOCK_CONTENT@31..37 + TEXT@31..37 "hello\n" + BLOCK_END@37..51 + TEXT@37..43 "#+end_" + TEXT@43..50 "example" + NEW_LINE@50..51 "\n" + "### + ); + + to_list("- "); + to_list("-\t"); + to_list("-\r"); + to_list("-\t\n"); + to_list("-\r\n"); + to_list("-"); + + let config = &ParseConfig::default(); + + assert!(list_node(("-a", config).into()).is_err()); + assert!(list_node(("*\r\n", config).into()).is_err()); + assert!(list_node(("* ", config).into()).is_err()); +} diff --git a/src/syntax/macros.rs b/src/syntax/macros.rs new file mode 100644 index 0000000..edbbe21 --- /dev/null +++ b/src/syntax/macros.rs @@ -0,0 +1,106 @@ +use nom::{ + bytes::complete::{take_until, take_while1}, + combinator::{map, opt, verify}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + l_curly3_token, l_parens_token, node, r_curly3_token, r_parens_token, GreenElement, + }, + input::Input, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn macros_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_curly3_token, + verify( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + |s: &Input| s.as_bytes()[0].is_ascii_alphabetic(), + ), + opt(tuple((l_parens_token, take_until(")}}}"), r_parens_token))), + r_curly3_token, + )), + |(l_curly3, name, argument, r_curly3)| { + let mut children = vec![]; + children.push(l_curly3); + children.push(name.text_token()); + if let Some((l_parens, argument, r_parens)) = argument { + children.extend([l_parens, argument.text_token(), r_parens]); + } + children.push(r_curly3); + node(MACROS, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn test() { + use crate::{ast::Macros, tests::to_ast, ParseConfig}; + + let to_macros = to_ast::(macros_node); + + insta::assert_debug_snapshot!( + to_macros("{{{title}}}").syntax, + @r###" + MACROS@0..11 + L_CURLY3@0..3 "{{{" + TEXT@3..8 "title" + R_CURLY3@8..11 "}}}" + "### + ); + + insta::assert_debug_snapshot!( + to_macros("{{{one_arg_macro(1)}}}").syntax, + @r###" + MACROS@0..22 + L_CURLY3@0..3 "{{{" + TEXT@3..16 "one_arg_macro" + L_PARENS@16..17 "(" + TEXT@17..18 "1" + R_PARENS@18..19 ")" + R_CURLY3@19..22 "}}}" + "### + ); + + insta::assert_debug_snapshot!( + to_macros("{{{two_arg_macro(1, 2)}}}").syntax, + @r###" + MACROS@0..25 + L_CURLY3@0..3 "{{{" + TEXT@3..16 "two_arg_macro" + L_PARENS@16..17 "(" + TEXT@17..21 "1, 2" + R_PARENS@21..22 ")" + R_CURLY3@22..25 "}}}" + "### + ); + + insta::assert_debug_snapshot!( + to_macros("{{{two_arg_macro(1\\,a, 2)}}}").syntax, + @r###" + MACROS@0..28 + L_CURLY3@0..3 "{{{" + TEXT@3..16 "two_arg_macro" + L_PARENS@16..17 "(" + TEXT@17..24 "1\\,a, 2" + R_PARENS@24..25 ")" + R_CURLY3@25..28 "}}}" + "### + ); + + let config = &ParseConfig::default(); + + assert!(macros_node(("{{{0uthor}}}", config).into()).is_err()); + assert!(macros_node(("{{{author}}", config).into()).is_err()); + assert!(macros_node(("{{{poem(}}}", config).into()).is_err()); + assert!(macros_node(("{{{poem)}}}", config).into()).is_err()); +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs new file mode 100644 index 0000000..4b0a620 --- /dev/null +++ b/src/syntax/mod.rs @@ -0,0 +1,322 @@ +//! Org-mode elements + +pub mod block; +pub mod clock; +#[cfg(feature = "syntax-org-fc")] +pub mod cloze; +pub mod combinator; +pub mod comment; +pub mod cookie; +pub mod document; +pub mod drawer; +pub mod dyn_block; +pub mod element; +pub mod emphasis; +pub mod entity; +pub mod fixed_width; +pub mod fn_def; +pub mod fn_ref; +pub mod headline; +pub mod inline_call; +pub mod inline_src; +pub mod input; +pub mod keyword; +pub mod latex_environment; +pub mod latex_fragment; +pub mod line_break; +pub mod link; +pub mod list; +pub mod macros; +pub mod object; +pub mod paragraph; +pub mod planning; +pub mod radio_target; +pub mod rule; +pub mod snippet; +pub mod subscript_superscript; +pub mod table; +pub mod target; +pub mod timestamp; + +use rowan::Language; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct OrgLanguage; + +impl Language for OrgLanguage { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind { + // SAFETY: SyntaxKind is `repr(u16)` + unsafe { std::mem::transmute::(raw.0) } + } + + fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind { + rowan::SyntaxKind(kind as u16) + } +} + +pub type SyntaxNode = rowan::SyntaxNode; +pub type SyntaxToken = rowan::SyntaxToken; +pub type SyntaxElement = rowan::SyntaxElement; +pub type SyntaxNodeChildren = rowan::SyntaxNodeChildren; +pub type SyntaxElementChildren = rowan::SyntaxElementChildren; + +#[allow(bad_style)] +#[allow(clippy::all)] +#[non_exhaustive] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[repr(u16)] +pub enum SyntaxKind { + // + // token + // + L_BRACKET, // '[' + R_BRACKET, // ']' + L_BRACKET2, // '[[' + R_BRACKET2, // ']]' + L_PARENS, // '(' + R_PARENS, // ')' + L_ANGLE, // '<' + R_ANGLE, // '>' + L_CURLY, // '{' + R_CURLY, // '}' + L_CURLY2, // '{{' + L_CURLY3, // '{{{' + R_CURLY3, // '}}}' + L_ANGLE2, // '<<' + R_ANGLE2, // '>>' + L_ANGLE3, // '<<<' + R_ANGLE3, // '>>>' + AT, // '@' + AT2, // '@@' + PERCENT, // '%' + PERCENT2, // '%%' + SLASH, // '/' + BACKSLASH, // '\' + DOLLAR, // '$' + DOLLAR2, // '$$' + UNDERSCORE, // '_' + STAR, // '*' + PLUS, // '+' + MINUS, // '-' + MINUS2, // '--' + COLON, // ':' + COLON2, // '::' + EQUAL, // '=' + TILDE, // '~' + HASH, // '#' + HASH_PLUS, // '#+' + DOUBLE_ARROW, // '=>' + PIPE, // '|' + COMMA, // ',' + CARET, // '^' + NEW_LINE, // '\n' or '\r\n' or '\r' + WHITESPACE, // ' ' or '\t' + BLANK_LINE, + TEXT, + + DOCUMENT, + SECTION, + PARAGRAPH, + + HEADLINE, + HEADLINE_STARS, + HEADLINE_TITLE, + HEADLINE_KEYWORD_TODO, + HEADLINE_KEYWORD_DONE, + HEADLINE_PRIORITY, + HEADLINE_TAGS, + PROPERTY_DRAWER, + NODE_PROPERTY, + PLANNING, + PLANNING_DEADLINE, + PLANNING_SCHEDULED, + PLANNING_CLOSED, + + // + // elements + // + /* table */ + ORG_TABLE, + ORG_TABLE_RULE_ROW, + ORG_TABLE_STANDARD_ROW, + ORG_TABLE_CELL, + /* list */ + LIST, + LIST_ITEM, + LIST_ITEM_INDENT, + LIST_ITEM_BULLET, + LIST_ITEM_COUNTER, + LIST_ITEM_CHECK_BOX, + LIST_ITEM_TAG, + LIST_ITEM_CONTENT, + /* drawer */ + DRAWER, + DRAWER_BEGIN, + DRAWER_END, + DRAWER_CONTENT, + KEYWORD, + BABEL_CALL, + AFFILIATED_KEYWORD, + TABLE_EL, + CLOCK, + FN_DEF, + COMMENT, + RULE, + FIXED_WIDTH, + /* dyn block */ + DYN_BLOCK, + DYN_BLOCK_BEGIN, + DYN_BLOCK_END, + /* block */ + SPECIAL_BLOCK, + QUOTE_BLOCK, + CENTER_BLOCK, + VERSE_BLOCK, + COMMENT_BLOCK, + EXAMPLE_BLOCK, + EXPORT_BLOCK, + SOURCE_BLOCK, + SOURCE_BLOCK_LANG, + BLOCK_BEGIN, + BLOCK_END, + BLOCK_CONTENT, + SRC_BLOCK_SWITCHES, + SRC_BLOCK_LANGUAGE, + SRC_BLOCK_PARAMETERS, + EXPORT_BLOCK_TYPE, + LATEX_ENVIRONMENT, + + // + // objects + // + INLINE_CALL, + INLINE_SRC, + LINK, + LINK_PATH, + LINE_BREAK, + COOKIE, + RADIO_TARGET, + FN_REF, + LATEX_FRAGMENT, + MACROS, + SNIPPET, + TARGET, + BOLD, + STRIKE, + ITALIC, + UNDERLINE, + VERBATIM, + CODE, + ENTITY, + SUPERSCRIPT, + SUBSCRIPT, + + /* timestamp */ + TIMESTAMP_ACTIVE, + TIMESTAMP_INACTIVE, + TIMESTAMP_DIARY, + // timestamp tokens + TIMESTAMP_YEAR, + TIMESTAMP_MONTH, + TIMESTAMP_DAY, + TIMESTAMP_HOUR, + TIMESTAMP_MINUTE, + TIMESTAMP_DAYNAME, + // for repeater or delay + TIMESTAMP_REPEATER_MARK, + TIMESTAMP_DELAY_MARK, + TIMESTAMP_VALUE, + TIMESTAMP_UNIT, + + #[cfg(feature = "syntax-org-fc")] + CLOZE, +} + +impl From for rowan::SyntaxKind { + fn from(value: SyntaxKind) -> Self { + OrgLanguage::kind_to_raw(value) + } +} + +impl SyntaxKind { + /// whether this node is [object](https://orgmode.org/worg/org-syntax.html#Objects) + pub fn is_object(&self) -> bool { + matches!( + self, + SyntaxKind::ENTITY + | SyntaxKind::LATEX_FRAGMENT + | SyntaxKind::SNIPPET + | SyntaxKind::FN_REF + | SyntaxKind::INLINE_CALL + | SyntaxKind::INLINE_SRC + | SyntaxKind::LINE_BREAK + | SyntaxKind::LINK + | SyntaxKind::MACROS + | SyntaxKind::RADIO_TARGET + | SyntaxKind::COOKIE + | SyntaxKind::SUPERSCRIPT + | SyntaxKind::SUBSCRIPT + | SyntaxKind::ORG_TABLE_CELL + | SyntaxKind::TIMESTAMP_ACTIVE + | SyntaxKind::TIMESTAMP_INACTIVE + | SyntaxKind::TIMESTAMP_DIARY + | SyntaxKind::BOLD + | SyntaxKind::ITALIC + | SyntaxKind::UNDERLINE + | SyntaxKind::VERBATIM + | SyntaxKind::CODE + | SyntaxKind::STRIKE + ) + } + + /// whether this node is [element](https://orgmode.org/worg/org-syntax.html#Elements) + pub fn is_element(&self) -> bool { + matches!(self, SyntaxKind::HEADLINE | SyntaxKind::SECTION) + || self.is_lesser_element() + || self.is_greater_element() + } + + /// whether this node is [lesser element](https://orgmode.org/worg/org-syntax.html#Lesser_Elements) + pub fn is_lesser_element(&self) -> bool { + matches!( + self, + SyntaxKind::COMMENT_BLOCK + | SyntaxKind::EXAMPLE_BLOCK + | SyntaxKind::EXPORT_BLOCK + | SyntaxKind::SOURCE_BLOCK + | SyntaxKind::VERSE_BLOCK + | SyntaxKind::CLOCK + | SyntaxKind::PLANNING + | SyntaxKind::COMMENT + | SyntaxKind::FIXED_WIDTH + | SyntaxKind::RULE + | SyntaxKind::KEYWORD + | SyntaxKind::AFFILIATED_KEYWORD + | SyntaxKind::BABEL_CALL + | SyntaxKind::LATEX_ENVIRONMENT + | SyntaxKind::NODE_PROPERTY + | SyntaxKind::PARAGRAPH + | SyntaxKind::ORG_TABLE_RULE_ROW + | SyntaxKind::ORG_TABLE_STANDARD_ROW + ) + } + + /// whether this node is [greater element](https://orgmode.org/worg/org-syntax.html#Greater_Elements) + pub fn is_greater_element(&self) -> bool { + matches!( + self, + SyntaxKind::CENTER_BLOCK + | SyntaxKind::QUOTE_BLOCK + | SyntaxKind::SPECIAL_BLOCK + | SyntaxKind::DRAWER + | SyntaxKind::DYN_BLOCK + | SyntaxKind::FN_DEF + | SyntaxKind::LIST_ITEM + | SyntaxKind::LIST + | SyntaxKind::PROPERTY_DRAWER + | SyntaxKind::ORG_TABLE + ) + } +} diff --git a/src/syntax/object.rs b/src/syntax/object.rs new file mode 100644 index 0000000..4bacba1 --- /dev/null +++ b/src/syntax/object.rs @@ -0,0 +1,375 @@ +use nom::{IResult, InputTake}; + +use super::{ + combinator::GreenElement, + cookie::cookie_node, + emphasis::{ + self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node, + }, + entity::entity_node, + fn_ref::fn_ref_node, + inline_call::inline_call_node, + inline_src::inline_src_node, + input::Input, + latex_fragment::latex_fragment_node, + line_break::line_break_node, + link::link_node, + macros::macros_node, + radio_target::radio_target_node, + snippet::snippet_node, + subscript_superscript::{self, subscript_node, superscript_node}, + target::target_node, + timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node}, +}; + +struct ObjectPositions<'a> { + input: Input<'a>, + pos: usize, + finder: jetscii::BytesConst, +} + +impl ObjectPositions<'_> { + fn standard(input: Input) -> ObjectPositions { + ObjectPositions { + input, + pos: 0, + finder: jetscii::bytes!( + b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ + b'@', /* snippet */ + b'<', /* timestamp, target, radio target */ + b'[', /* link, cookie, fn_ref, timestamp */ + b'c', /* inline call */ + b's', /* inline source */ + b'\\', b'$', /* latex & entity */ + b'{', /* macros */ + b'^', /* superscript */ + b'_' /* subscript */ + ), + } + } + + fn minimal(input: Input) -> ObjectPositions { + ObjectPositions { + input, + pos: 0, + finder: jetscii::bytes!( + b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ + b'\\', b'$', /* latex & entity */ + b'^', /* superscript */ + b'_' /* subscript */ + ), + } + } + + fn link_description(input: Input) -> ObjectPositions { + ObjectPositions { + input, + pos: 0, + finder: jetscii::bytes!( + b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */ + b'\\', b'$', /* latex & entity */ + b'@', /* snippet */ + b'c', /* inline call */ + b's', /* inline source */ + b'{', /* macros */ + b'[', /* cookie */ + b'^', /* superscript */ + b'_' /* subscript */ + ), + } + } +} + +impl<'a> Iterator for ObjectPositions<'a> { + type Item = (Input<'a>, Input<'a>); + + fn next(&mut self) -> Option { + if self.input.len() < 2 || self.pos >= self.input.len() { + return None; + } + + let previous = self.pos; + let i = self.finder.find(&self.input.as_bytes()[self.pos..])?; + let p = self.pos + i; + + self.pos = p + 1; + + debug_assert!( + previous < self.pos && self.pos <= self.input.s.len(), + "{} < {} < {}", + previous, + self.pos, + self.input.s.len() + ); + + // a valid object requires at least two characters + if self.input.s.len() - p < 2 { + return None; + } + + Some(self.input.take_split(p)) + } +} + +/// parse minimal sets of objects, including +/// - LaTeX fragments ('\\') +/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/') +/// - Entities ('\\') +/// - Superscripts and Subscripts +pub fn minimal_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::minimal, + |i: Input, pre: Input| match &i.as_bytes()[0] { + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'$' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} + +/// parses standard sets of objects, including +/// +/// - Entities +/// - LaTeX Fragments +/// - Export Snippets +/// - Footnote References +/// - Inline Babel Calls +/// - Inline Source Blocks +/// - Links +/// - Macros +/// - Targets and Radio Targets +/// - Statistics Cookies +/// - Timestamps +/// - Text Markup (bold code strike verbatim underline italic) +/// - Line Breaks +/// - Subscript and Superscript +/// - Cloze (if `syntax-org-fc` is enabled) +/// +/// // todo: +/// - Citations +pub fn standard_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::standard, + |i: Input, pre: Input| match &i.as_bytes()[0] { + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'@' => snippet_node(i), + b'{' => { + cfg_if::cfg_if! { + if #[cfg(feature = "syntax-org-fc")] { + macros_node(i).or_else(|_| super::cloze::cloze_node(i)) + } else { + macros_node(i) + } + } + } + b'<' => radio_target_node(i) + .or_else(|_| target_node(i)) + .or_else(|_| timestamp_diary_node(i)) + .or_else(|_| timestamp_active_node(i)), + b'[' => cookie_node(i) + .or_else(|_| link_node(i)) + .or_else(|_| fn_ref_node(i)) + .or_else(|_| timestamp_inactive_node(i)), + // NOTE: although not specified in document, inline call and inline src follows the + // same pre tokens rule as text markup + b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), + b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), + b'$' => latex_fragment_node(i), + b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} + +pub fn link_description_object_nodes(input: Input) -> Vec { + object_nodes( + ObjectPositions::link_description, + |i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] { + b'@' => snippet_node(i), + b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i), + b's' if emphasis::verify_pre(pre.s) => inline_src_node(i), + b'{' => macros_node(i), + b'[' => cookie_node(i), + b'*' if emphasis::verify_pre(pre.s) => bold_node(i), + b'+' if emphasis::verify_pre(pre.s) => strike_node(i), + b'/' if emphasis::verify_pre(pre.s) => italic_node(i), + b'_' if emphasis::verify_pre(pre.s) => underline_node(i), + b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i), + b'~' if emphasis::verify_pre(pre.s) => code_node(i), + b'$' => latex_fragment_node(i), + b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'^' if subscript_superscript::verify_pre(&pre) => superscript_node(i), + b'_' if subscript_superscript::verify_pre(&pre) => subscript_node(i), + _ => Err(nom::Err::Error(())), + }, + input, + ) +} + +fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec +where + F: Fn(Input) -> ObjectPositions, + P: Fn(Input<'a>, Input<'a>) -> IResult, GreenElement, ()>, +{ + let mut i = input; + let mut nodes = vec![]; + + 'l: while !i.is_empty() { + for (input, head) in position(i) { + debug_assert!( + input.s.len() >= 2, + "object must have at least two characters: {:?}", + input.s + ); + + if let Ok((input, pre)) = parse(input, head) { + if !head.is_empty() { + nodes.push(head.text_token()) + } + nodes.push(pre); + debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len()); + i = input; + continue 'l; + } + } + nodes.push(i.text_token()); + break; + } + + debug_assert_eq!( + input.as_str(), + nodes.iter().fold(String::new(), |s, i| s + &i.to_string()), + "parser must be lossless" + ); + + nodes +} + +#[test] +fn positions() { + let config = crate::ParseConfig::default(); + + let vec = ObjectPositions::standard(("*", &config).into()).collect::>(); + assert!(vec.is_empty()); + + let vec = ObjectPositions::standard(("*{", &config).into()).collect::>(); + assert_eq!(vec.len(), 1); + assert_eq!(vec[0].0.s, "*{"); + + // https://github.com/PoiScript/orgize/issues/69 + let vec = ObjectPositions::standard(("{3}", &config).into()).collect::>(); + assert_eq!(vec.len(), 1); + assert_eq!(vec[0].0.s, "{3}"); + + let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::>(); + assert_eq!(vec.len(), 7); + assert_eq!(vec[0].0.s, "*{()}//s\nc<<"); + assert_eq!(vec[1].0.s, "{()}//s\nc<<"); + assert_eq!(vec[2].0.s, "//s\nc<<"); + assert_eq!(vec[3].0.s, "/s\nc<<"); + assert_eq!(vec[4].0.s, "s\nc<<"); + assert_eq!(vec[5].0.s, "c<<"); + assert_eq!(vec[6].0.s, "<<"); +} + +#[test] +fn parse() { + use crate::{ + syntax::{combinator::node, SyntaxKind, SyntaxNode}, + ParseConfig, + }; + + let t = |input: &str| { + let config = &ParseConfig::default(); + let children = standard_object_nodes((input, config).into()); + SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap()) + }; + + insta::assert_debug_snapshot!( + t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"), + @r###" + PARAGRAPH@0..93 + CODE@0..26 + TILDE@0..1 "~" + TEXT@1..25 "org-inlinetask-min-level" + TILDE@25..26 "~" + FN_REF@26..93 + L_BRACKET@26..27 "[" + TEXT@27..29 "fn" + COLON@29..30 ":" + TEXT@30..34 "oiml" + COLON@34..35 ":" + TEXT@35..57 "The default value of \n" + CODE@57..83 + TILDE@57..58 "~" + TEXT@58..82 "org-inlinetask-min-level" + TILDE@82..83 "~" + TEXT@83..87 " is " + VERBATIM@87..91 + EQUAL@87..88 "=" + TEXT@88..90 "15" + EQUAL@90..91 "=" + TEXT@91..92 "." + R_BRACKET@92..93 "]" + "### + ); + + insta::assert_debug_snapshot!( + t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003. +The canonical parser is =org-element.el=, which provides a number of +functions starting with ~org-element-~."#), + @r###" + PARAGRAPH@0..175 + TEXT@0..9 "Org is a " + ITALIC@9..34 + SLASH@9..10 "/" + TEXT@10..33 "plaintext markup syntax" + SLASH@33..34 "/" + TEXT@34..50 " developed with " + BOLD@50..57 + STAR@50..51 "*" + TEXT@51..56 "Emacs" + STAR@56..57 "*" + TEXT@57..91 " in 2003.\nThe canonic ..." + VERBATIM@91..107 + EQUAL@91..92 "=" + TEXT@92..106 "org-element.el" + EQUAL@106..107 "=" + TEXT@107..160 ", which provides a nu ..." + CODE@160..174 + TILDE@160..161 "~" + TEXT@161..173 "org-element-" + TILDE@173..174 "~" + TEXT@174..175 "." + "### + ); + + insta::assert_debug_snapshot!( + t("a^abc"), + @r###" + PARAGRAPH@0..5 + TEXT@0..1 "a" + SUPERSCRIPT@1..5 + CARET@1..2 "^" + TEXT@2..5 "abc" + "### + ); +} diff --git a/src/syntax/paragraph.rs b/src/syntax/paragraph.rs new file mode 100644 index 0000000..f98c22e --- /dev/null +++ b/src/syntax/paragraph.rs @@ -0,0 +1,101 @@ +use nom::{IResult, InputTake}; + +use super::{ + combinator::{blank_lines, line_ends_iter, node, GreenElement}, + input::Input, + keyword::affiliated_keyword_nodes, + object::standard_object_nodes, + SyntaxKind, +}; + +/// Recognizes one paragraph +pub fn paragraph_node(input: Input) -> IResult { + crate::lossless_parser!(paragraph_node_base, input) +} + +/// Recognizes multiple paragraphs +pub fn paragraph_nodes(input: Input) -> Result, nom::Err<()>> { + let mut i = input; + let mut children = vec![]; + while !i.is_empty() { + let (input, node) = paragraph_node(i)?; + children.push(node); + debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len()); + i = input; + } + Ok(children) +} + +fn paragraph_node_base(input: Input) -> IResult { + debug_assert!(!input.is_empty()); + + let (input, keywords) = affiliated_keyword_nodes(input)?; + + let mut start = 0; + for idx in line_ends_iter(input.as_str()) { + // stops at blank line + if input.s[start..idx].bytes().all(|c| c.is_ascii_whitespace()) { + break; + } + + start = idx; + } + + let (input, contents) = input.take_split(start); + let (input, post_blank) = blank_lines(input)?; + + let mut children = vec![]; + children.extend(keywords); + children.extend(standard_object_nodes(contents)); + children.extend(post_blank); + + Ok((input, node(SyntaxKind::PARAGRAPH, children))) +} + +#[test] +fn parse() { + use crate::{ast::Paragraph, tests::to_ast}; + + let to_paragraph = to_ast::(paragraph_node); + + insta::assert_debug_snapshot!( + to_paragraph(r#"a"#).syntax, + @r###" + PARAGRAPH@0..1 + TEXT@0..1 "a" + "### + ); + + insta::assert_debug_snapshot!( + to_paragraph(r#"a + "#).syntax, + @r###" + PARAGRAPH@0..6 + TEXT@0..2 "a\n" + BLANK_LINE@2..6 " " + "### + ); + + insta::assert_debug_snapshot!( + to_paragraph(r#"a +b +c +"#).syntax, + @r###" + PARAGRAPH@0..6 + TEXT@0..6 "a\nb\nc\n" + "### + ); + + insta::assert_debug_snapshot!( + to_paragraph(r#"a + +c +"#).syntax, + @r###" + PARAGRAPH@0..3 + TEXT@0..2 "a\n" + BLANK_LINE@2..3 "\n" + "### + ); +} diff --git a/src/syntax/planning.rs b/src/syntax/planning.rs new file mode 100644 index 0000000..d35c270 --- /dev/null +++ b/src/syntax/planning.rs @@ -0,0 +1,91 @@ +use nom::{ + branch::alt, bytes::complete::tag, character::complete::space0, combinator::iterator, + sequence::tuple, IResult, +}; + +use super::{ + combinator::{eol_or_eof, GreenElement, NodeBuilder}, + input::Input, + timestamp::{timestamp_active_node, timestamp_inactive_node}, + SyntaxKind::*, +}; + +pub fn planning_node(input: Input) -> IResult { + debug_assert!(!input.is_empty()); + crate::lossless_parser!(planning_node_base, input) +} + +fn planning_node_base(input: Input) -> IResult { + let mut b = NodeBuilder::new(); + + let mut it = iterator( + input, + tuple(( + space0, + alt((tag("DEADLINE:"), tag("SCHEDULED:"), tag("CLOSED:"))), + space0, + alt((timestamp_active_node, timestamp_inactive_node)), + )), + ); + + let start_len = b.len(); + + it.for_each(|(ws, text, ws_, timestamp)| { + let mut b_ = NodeBuilder::new(); + b_.ws(ws); + b_.text(text); + b_.ws(ws_); + b_.push(timestamp); + b.push(b_.finish(match text.as_str() { + "DEADLINE:" => PLANNING_DEADLINE, + "SCHEDULED:" => PLANNING_SCHEDULED, + "CLOSED:" => PLANNING_CLOSED, + _ => unreachable!(), + })); + }); + + if b.len() == start_len { + return Err(nom::Err::Error(())); + } + + let (input, _) = it.finish()?; + let (input, ws) = space0(input)?; + let (input, nl) = eol_or_eof(input)?; + + b.ws(ws); + b.nl(nl); + + Ok((input, b.finish(PLANNING))) +} + +#[test] +fn prase() { + use crate::{ast::Planning, tests::to_ast, ParseConfig}; + + let to_planning = to_ast::(planning_node); + + insta::assert_debug_snapshot!( + to_planning("SCHEDULED: <2019-04-08 Mon>").syntax, + @r###" + PLANNING@0..27 + PLANNING_SCHEDULED@0..27 + TEXT@0..10 "SCHEDULED:" + WHITESPACE@10..11 " " + TIMESTAMP_ACTIVE@11..27 + L_ANGLE@11..12 "<" + TIMESTAMP_YEAR@12..16 "2019" + MINUS@16..17 "-" + TIMESTAMP_MONTH@17..19 "04" + MINUS@19..20 "-" + TIMESTAMP_DAY@20..22 "08" + WHITESPACE@22..23 " " + TIMESTAMP_DAYNAME@23..26 "Mon" + R_ANGLE@26..27 ">" + "### + ); + + let config = &ParseConfig::default(); + + assert!(planning_node((" ", config).into()).is_err()); + assert!(planning_node((" SCHEDULED: ", config).into()).is_err()); +} diff --git a/src/syntax/radio_target.rs b/src/syntax/radio_target.rs new file mode 100644 index 0000000..74fbfac --- /dev/null +++ b/src/syntax/radio_target.rs @@ -0,0 +1,83 @@ +use nom::{ + bytes::complete::take_while, + combinator::{map, verify}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{l_angle3_token, node, r_angle3_token, GreenElement}, + input::Input, + object::minimal_object_nodes, + SyntaxKind::*, +}; + +pub fn radio_target_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_angle3_token, + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &Input| { + s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ') + }, + ), + r_angle3_token, + )), + |(l_angle3, contents, r_angle3)| { + let mut children = vec![l_angle3]; + children.extend(minimal_object_nodes(contents)); + children.push(r_angle3); + node(RADIO_TARGET, children) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::RadioTarget, tests::to_ast, ParseConfig}; + + let to_radio_target = to_ast::(radio_target_node); + + insta::assert_debug_snapshot!( + to_radio_target("<<>>").syntax, + @r###" + RADIO_TARGET@0..12 + L_ANGLE3@0..3 "<<<" + TEXT@3..9 "target" + R_ANGLE3@9..12 ">>>" + "### + ); + + insta::assert_debug_snapshot!( + to_radio_target("<<>>").syntax, + @r###" + RADIO_TARGET@0..13 + L_ANGLE3@0..3 "<<<" + TEXT@3..10 "tar get" + R_ANGLE3@10..13 ">>>" + "### + ); + + insta::assert_debug_snapshot!( + to_radio_target("<<<\\alpha>>>").syntax, + @r###" + RADIO_TARGET@0..12 + L_ANGLE3@0..3 "<<<" + ENTITY@3..9 + BACKSLASH@3..4 "\\" + TEXT@4..9 "alpha" + R_ANGLE3@9..12 ">>>" + "### + ); + + let config = &ParseConfig::default(); + + assert!(radio_target_node(("<<>>", config).into()).is_err()); + assert!(radio_target_node(("<<< target>>>", config).into()).is_err()); + assert!(radio_target_node(("<<>>", config).into()).is_err()); + assert!(radio_target_node(("<<get>>>", config).into()).is_err()); + assert!(radio_target_node(("<<>>", config).into()).is_err()); + assert!(radio_target_node(("<<>", config).into()).is_err()); +} diff --git a/src/syntax/rule.rs b/src/syntax/rule.rs new file mode 100644 index 0000000..cf34171 --- /dev/null +++ b/src/syntax/rule.rs @@ -0,0 +1,88 @@ +use nom::{ + bytes::complete::take_while_m_n, character::complete::space0, combinator::map, sequence::tuple, + IResult, +}; + +use super::{ + combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder}, + input::Input, + SyntaxKind::*, +}; + +pub fn rule_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + space0, + take_while_m_n(5, usize::MAX, |c| c == '-'), + space0, + eol_or_eof, + blank_lines, + )), + |(ws, dashes, ws_, nl, post_blank)| { + let mut b = NodeBuilder::new(); + b.ws(ws); + b.text(dashes); + b.ws(ws_); + b.nl(nl); + b.children.extend(post_blank); + b.finish(RULE) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::Rule, tests::to_ast, ParseConfig}; + + let to_rule = to_ast::(rule_node); + + insta::assert_debug_snapshot!( + to_rule("-----").syntax, + @r###" + RULE@0..5 + TEXT@0..5 "-----" + "### + ); + + insta::assert_debug_snapshot!( + to_rule("--------").syntax, + @r###" + RULE@0..8 + TEXT@0..8 "--------" + "### + ); + + insta::assert_debug_snapshot!( + to_rule("-----\n\n\n").syntax, + @r###" + RULE@0..8 + TEXT@0..5 "-----" + NEW_LINE@5..6 "\n" + BLANK_LINE@6..7 "\n" + BLANK_LINE@7..8 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_rule("----- \n").syntax, + @r###" + RULE@0..8 + TEXT@0..5 "-----" + WHITESPACE@5..7 " " + NEW_LINE@7..8 "\n" + "### + ); + + let config = &ParseConfig::default(); + + assert!(rule_node(("", config).into()).is_err()); + assert!(rule_node(("----", config).into()).is_err()); + assert!(rule_node(("None----", config).into()).is_err()); + assert!(rule_node(("None ----", config).into()).is_err()); + assert!(rule_node(("None------", config).into()).is_err()); + assert!(rule_node(("----None----", config).into()).is_err()); + assert!(rule_node(("\t\t----", config).into()).is_err()); + assert!(rule_node(("------None", config).into()).is_err()); + assert!(rule_node(("----- None", config).into()).is_err()); +} diff --git a/src/syntax/snippet.rs b/src/syntax/snippet.rs new file mode 100644 index 0000000..5f55f44 --- /dev/null +++ b/src/syntax/snippet.rs @@ -0,0 +1,92 @@ +use nom::{ + bytes::complete::{take_until, take_while1}, + combinator::map, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{at2_token, colon_token, node, GreenElement}, + input::Input, + SyntaxKind::*, +}; + +pub fn snippet_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + at2_token, + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'), + colon_token, + take_until("@@"), + at2_token, + )), + |(at2, name, colon, value, at2_)| { + node( + SNIPPET, + [at2, name.text_token(), colon, value.text_token(), at2_], + ) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::Snippet, tests::to_ast, ParseConfig}; + + let to_snippet = to_ast::(snippet_node); + + insta::assert_debug_snapshot!( + to_snippet("@@html:@@").syntax, + @r###" + SNIPPET@0..12 + AT2@0..2 "@@" + TEXT@2..6 "html" + COLON@6..7 ":" + TEXT@7..10 "" + AT2@10..12 "@@" + "### + ); + + insta::assert_debug_snapshot!( + to_snippet("@@latex:any arbitrary LaTeX code@@").syntax, + @r###" + SNIPPET@0..34 + AT2@0..2 "@@" + TEXT@2..7 "latex" + COLON@7..8 ":" + TEXT@8..32 "any arbitrary LaTeX code" + AT2@32..34 "@@" + "### + ); + + insta::assert_debug_snapshot!( + to_snippet("@@html:@@").syntax, + @r###" + SNIPPET@0..9 + AT2@0..2 "@@" + TEXT@2..6 "html" + COLON@6..7 ":" + TEXT@7..7 "" + AT2@7..9 "@@" + "### + ); + + insta::assert_debug_snapshot!( + to_snippet("@@html:

    @

    @@").syntax, + @r###" + SNIPPET@0..17 + AT2@0..2 "@@" + TEXT@2..6 "html" + COLON@6..7 ":" + TEXT@7..15 "

    @

    " + AT2@15..17 "@@" + "### + ); + + let config = &ParseConfig::default(); + + assert!(snippet_node(("@@html:@", config).into()).is_err()); + assert!(snippet_node(("@@html@@", config).into()).is_err()); + assert!(snippet_node(("@@:@@", config).into()).is_err()); +} diff --git a/src/syntax/subscript_superscript.rs b/src/syntax/subscript_superscript.rs new file mode 100644 index 0000000..831ff22 --- /dev/null +++ b/src/syntax/subscript_superscript.rs @@ -0,0 +1,179 @@ +use memchr::memchr2_iter; +use nom::{ + branch::alt, + bytes::complete::{tag, take_while1}, + combinator::opt, + IResult, InputTake, +}; + +use crate::{ + syntax::{ + combinator::{caret_token, underscore_token}, + object::standard_object_nodes, + }, + SyntaxKind, +}; + +use super::{ + combinator::{l_curly_token, node, r_curly_token, GreenElement}, + input::Input, +}; + +pub fn superscript_node(input: Input) -> IResult { + let (input, caret) = caret_token(input)?; + + let mut children = vec![caret]; + + if input.c.use_sub_superscript.is_brace() { + let (input, rest) = template1(input)?; + children.extend(rest); + return Ok((input, node(SyntaxKind::SUPERSCRIPT, children))); + } + + let (input, rest) = alt((template0, template1, template2))(input)?; + children.extend(rest); + + Ok((input, node(SyntaxKind::SUPERSCRIPT, children))) +} + +pub fn subscript_node(input: Input) -> IResult { + let (input, underscore) = underscore_token(input)?; + + let mut children = vec![underscore]; + + if input.c.use_sub_superscript.is_brace() { + let (input, rest) = template1(input)?; + children.extend(rest); + return Ok((input, node(SyntaxKind::SUBSCRIPT, children))); + } + + let (input, rest) = alt((template0, template1, template2))(input)?; + children.extend(rest); + + Ok((input, node(SyntaxKind::SUBSCRIPT, children))) +} + +fn template0(input: Input) -> IResult, ()> { + let (input, star) = tag("*")(input)?; + Ok((input, vec![star.text_token()])) +} + +fn template1(input: Input) -> IResult, ()> { + let (input, l) = l_curly_token(input)?; + let (input, contents) = balanced_brackets(input)?; + let (input, r) = r_curly_token(input)?; + let mut children = vec![]; + children.push(l); + children.extend(standard_object_nodes(contents)); + children.push(r); + Ok((input, children)) +} + +fn template2(input: Input) -> IResult, ()> { + let (input, sign) = opt(alt((tag("+"), tag("-"))))(input)?; + + let (input, contents) = + take_while1(|c: char| c.is_alphanumeric() || c == ',' || c == '\\' || c == '.')(input)?; + + if contents.s.ends_with(|c: char| !c.is_alphanumeric()) { + return Err(nom::Err::Error(())); + } + + let mut children = vec![]; + + if let Some(s) = sign { + children.push(s.text_token()) + } + + children.push(contents.text_token()); + + Ok((input, children)) +} + +fn balanced_brackets(input: Input) -> IResult { + let mut pairs = 1; + let bytes = input.as_bytes(); + for i in memchr2_iter(b'{', b'}', bytes) { + if bytes[i] == b'{' { + pairs += 1; + } else if pairs != 1 { + pairs -= 1; + } else { + return Ok(input.take_split(i)); + } + } + Err(nom::Err::Error(())) +} + +pub fn verify_pre(i: &Input) -> bool { + if i.c.use_sub_superscript.is_nil() { + return false; + } + let s = i.s; + if s.is_empty() { + return false; + } + let last = s.as_bytes()[s.len() - 1]; + last != b' ' && last != b'\t' +} + +#[test] +fn parse() { + use crate::ast::Subscript; + use crate::config::{ParseConfig, UseSubSuperscript}; + use crate::tests::to_ast; + + let to_subscript = to_ast::(subscript_node); + + insta::assert_debug_snapshot!( + to_subscript("_*").syntax, + @r###" + SUBSCRIPT@0..2 + UNDERSCORE@0..1 "_" + TEXT@1..2 "*" + "### + ); + + insta::assert_debug_snapshot!( + to_subscript("_{*bo\nld*}").syntax, + @r###" + SUBSCRIPT@0..10 + UNDERSCORE@0..1 "_" + L_CURLY@1..2 "{" + BOLD@2..9 + STAR@2..3 "*" + TEXT@3..8 "bo\nld" + STAR@8..9 "*" + R_CURLY@9..10 "}" + "### + ); + + insta::assert_debug_snapshot!( + to_subscript("_+123").syntax, + @r###" + SUBSCRIPT@0..5 + UNDERSCORE@0..1 "_" + TEXT@1..2 "+" + TEXT@2..5 "123" + "### + ); + + insta::assert_debug_snapshot!( + to_subscript("_abc").syntax, + @r###" + SUBSCRIPT@0..4 + UNDERSCORE@0..1 "_" + TEXT@1..4 "abc" + "### + ); + + let with_brace = ParseConfig { + use_sub_superscript: UseSubSuperscript::Brace, + ..Default::default() + }; + + debug_assert!(subscript_node(("_*", &with_brace).into()).is_err()); + debug_assert!(subscript_node(("_abc", &with_brace).into()).is_err()); + debug_assert!(subscript_node(("_+123", &with_brace).into()).is_err()); + debug_assert!(subscript_node(("_{*bo\nld*}", &with_brace).into()).is_ok()); +} diff --git a/src/syntax/table.rs b/src/syntax/table.rs new file mode 100644 index 0000000..eb77a40 --- /dev/null +++ b/src/syntax/table.rs @@ -0,0 +1,265 @@ +use nom::{ + bytes::complete::take_while, + character::complete::{multispace0, space0}, + combinator::iterator, + sequence::tuple, + Err, IResult, InputTake, Slice, +}; + +use super::{ + combinator::{blank_lines, line_ends_iter, node, pipe_token, GreenElement, NodeBuilder}, + input::Input, + keyword::tblfm_keyword_nodes, + object::standard_object_nodes, + SyntaxKind::*, +}; + +fn org_table_node_base(input: Input) -> IResult { + let mut children = vec![]; + + let mut start = 0; + for i in line_ends_iter(input.as_str()) { + let line = input.slice(start..i); + let trimmed = line.as_str().trim_start_matches([' ', '\t']); + + // Org tables end at the first line not starting with a vertical bar. + if !trimmed.starts_with('|') { + break; + } + + if trimmed.starts_with("|-") { + children.push(node(ORG_TABLE_RULE_ROW, [line.text_token()])); + } else { + children.push(table_standard_row_node(line)?); + } + + start = i; + } + + if start == 0 { + return Err(nom::Err::Error(())); + } + + let input = input.slice(start..); + + let (input, tblfm) = tblfm_keyword_nodes(input)?; + + let (input, post_blank) = blank_lines(input)?; + + children.extend(tblfm); + children.extend(post_blank); + + Ok((input, node(ORG_TABLE, children))) +} + +fn table_standard_row_node(input: Input) -> Result> { + let mut b = NodeBuilder::new(); + + let (input, ws) = space0(input)?; + + b.ws(ws); + + let mut it = iterator( + input, + tuple((pipe_token, multispace0, take_while(|c: char| c != '|'))), + ); + + it.for_each(|(pipe, ws, input)| { + b.push(pipe); + b.ws(ws); + + if input.is_empty() { + return; + } + + match input + .as_bytes() + .iter() + .rposition(|b| !b.is_ascii_whitespace()) + { + Some(idx) => { + let (ws, cell) = input.take_split(idx + 1); + b.push(node(ORG_TABLE_CELL, standard_object_nodes(cell))); + b.ws(ws); + } + _ => { + b.push(node(ORG_TABLE_CELL, standard_object_nodes(input))); + } + } + }); + let (input, _) = it.finish()?; + debug_assert!(input.is_empty()); + + Ok(b.finish(ORG_TABLE_STANDARD_ROW)) +} + +fn table_el_node_base(input: Input) -> IResult { + let mut start = 0; + for i in line_ends_iter(input.as_str()) { + let line = &input.s[start..i]; + let trimmed = line.trim(); + + if start == 0 { + // Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs + if !trimmed.starts_with("+-") || trimmed.bytes().any(|c| c != b'+' && c != b'-') { + return Err(Err::Error(())); + } + } + + // Table.el tables end at the first line not starting with either a vertical line or a plus sign. + if !trimmed.starts_with('|') && !trimmed.starts_with('+') { + break; + } + + start = i; + } + + let (input, contents) = input.take_split(start); + let (input, post_blank) = blank_lines(input)?; + + let mut children = vec![]; + children.push(contents.text_token()); + children.extend(post_blank); + + Ok((input, node(TABLE_EL, children))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn org_table_node(input: Input) -> IResult { + crate::lossless_parser!(org_table_node_base, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn table_el_node(input: Input) -> IResult { + crate::lossless_parser!(table_el_node_base, input) +} + +#[test] +fn parse_org_table() { + use crate::{ast::OrgTable, tests::to_ast}; + + let to_org_table = to_ast::(org_table_node); + + insta::assert_debug_snapshot!( + to_org_table("|").syntax, + @r###" + ORG_TABLE@0..1 + ORG_TABLE_STANDARD_ROW@0..1 + PIPE@0..1 "|" + "### + ); + + insta::assert_debug_snapshot!( + to_org_table( +r#"| +|- +|a +|- +| a | +"# + ).syntax, + @r###" + ORG_TABLE@0..20 + ORG_TABLE_STANDARD_ROW@0..2 + PIPE@0..1 "|" + WHITESPACE@1..2 "\n" + ORG_TABLE_RULE_ROW@2..5 + TEXT@2..5 "|-\n" + ORG_TABLE_STANDARD_ROW@5..8 + PIPE@5..6 "|" + ORG_TABLE_CELL@6..7 + TEXT@6..7 "a" + WHITESPACE@7..8 "\n" + ORG_TABLE_RULE_ROW@8..11 + TEXT@8..11 "|-\n" + ORG_TABLE_STANDARD_ROW@11..20 + PIPE@11..12 "|" + WHITESPACE@12..15 " " + ORG_TABLE_CELL@15..16 + TEXT@15..16 "a" + WHITESPACE@16..18 " " + PIPE@18..19 "|" + WHITESPACE@19..20 "\n" + "### + ); + + insta::assert_debug_snapshot!( + to_org_table("| a |\n#+tblfm: test").syntax, + @r###" + ORG_TABLE@0..19 + ORG_TABLE_STANDARD_ROW@0..6 + PIPE@0..1 "|" + WHITESPACE@1..2 " " + ORG_TABLE_CELL@2..3 + TEXT@2..3 "a" + WHITESPACE@3..4 " " + PIPE@4..5 "|" + WHITESPACE@5..6 "\n" + KEYWORD@6..19 + HASH_PLUS@6..8 "#+" + TEXT@8..13 "tblfm" + COLON@13..14 ":" + TEXT@14..19 " test" + "### + ); + + insta::assert_debug_snapshot!( + to_org_table("| a |\n#+TBLFM: test1\n#+TBLFM: test2").syntax, + @r###" + ORG_TABLE@0..35 + ORG_TABLE_STANDARD_ROW@0..6 + PIPE@0..1 "|" + WHITESPACE@1..2 " " + ORG_TABLE_CELL@2..3 + TEXT@2..3 "a" + WHITESPACE@3..4 " " + PIPE@4..5 "|" + WHITESPACE@5..6 "\n" + KEYWORD@6..21 + HASH_PLUS@6..8 "#+" + TEXT@8..13 "TBLFM" + COLON@13..14 ":" + TEXT@14..20 " test1" + NEW_LINE@20..21 "\n" + KEYWORD@21..35 + HASH_PLUS@21..23 "#+" + TEXT@23..28 "TBLFM" + COLON@28..29 ":" + TEXT@29..35 " test2" + "### + ); +} + +#[test] +fn parse_table_el() { + use crate::{ast::TableEl, tests::to_ast, ParseConfig}; + + let to_table_el = to_ast::(table_el_node); + + insta::assert_debug_snapshot!( + to_table_el( + r#" +---+ + | | + +---+ + + "# + ).syntax, + @r###" + TABLE_EL@0..37 + TEXT@0..32 " +---+\n | |\n ..." + BLANK_LINE@32..33 "\n" + BLANK_LINE@33..37 " " + "### + ); + + let config = &ParseConfig::default(); + + assert!(table_el_node(("", config).into()).is_err()); + assert!(table_el_node(("+----|---", config).into()).is_err()); +} diff --git a/src/syntax/target.rs b/src/syntax/target.rs new file mode 100644 index 0000000..ceba339 --- /dev/null +++ b/src/syntax/target.rs @@ -0,0 +1,69 @@ +use nom::{ + bytes::complete::take_while, + combinator::{map, verify}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{l_angle2_token, node, r_angle2_token, GreenElement}, + input::Input, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn target_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_angle2_token, + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &Input| { + s.as_str().starts_with(|c| c != ' ') && s.as_str().ends_with(|c| c != ' ') + }, + ), + r_angle2_token, + )), + |(l_angle2, target, r_angle2)| node(TARGET, [l_angle2, target.text_token(), r_angle2]), + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::Target, tests::to_ast, ParseConfig}; + + let to_target = to_ast::(target_node); + + insta::assert_debug_snapshot!( + to_target("<>").syntax, + @r###" + TARGET@0..10 + L_ANGLE2@0..2 "<<" + TEXT@2..8 "target" + R_ANGLE2@8..10 ">>" + "### + ); + + insta::assert_debug_snapshot!( + to_target("<>").syntax, + @r###" + TARGET@0..11 + L_ANGLE2@0..2 "<<" + TEXT@2..9 "tar get" + R_ANGLE2@9..11 ">>" + "### + ); + + let config = &ParseConfig::default(); + + assert!(target_node(("<>", config).into()).is_err()); + assert!(target_node(("<< target>>", config).into()).is_err()); + assert!(target_node(("<>", config).into()).is_err()); + assert!(target_node(("<get>>", config).into()).is_err()); + assert!(target_node(("<>", config).into()).is_err()); + assert!(target_node(("<", config).into()).is_err()); +} diff --git a/src/syntax/timestamp.rs b/src/syntax/timestamp.rs new file mode 100644 index 0000000..31686d3 --- /dev/null +++ b/src/syntax/timestamp.rs @@ -0,0 +1,357 @@ +use nom::{ + branch::alt, + bytes::complete::{tag, take_till, take_while1, take_while_m_n}, + character::complete::{digit1, space0, space1}, + combinator::{iterator, map, opt}, + sequence::tuple, + IResult, +}; + +use super::{ + combinator::{ + colon_token, l_angle_token, l_bracket_token, l_parens_token, minus2_token, minus_token, + node, percent2_token, r_angle_token, r_bracket_token, r_parens_token, GreenElement, + NodeBuilder, + }, + input::Input, + SyntaxKind::*, +}; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn timestamp_diary_node(input: Input) -> IResult { + let mut parser = map( + tuple(( + l_angle_token, + percent2_token, + l_parens_token, + take_till(|c| c == ')' || c == '>' || c == '\n'), + r_parens_token, + r_angle_token, + )), + |(l_angle, percent2, l_paren, value, r_paren, r_angle)| { + node( + TIMESTAMP_DIARY, + [ + l_angle, + percent2, + l_paren, + value.text_token(), + r_paren, + r_angle, + ], + ) + }, + ); + crate::lossless_parser!(parser, input) +} + +fn date(i: Input) -> IResult { + map( + tuple(( + take_while_m_n(4, 4, |c: char| c.is_ascii_digit()), + minus_token, + take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), + minus_token, + take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), + )), + |(year, minus, month, minus_, day)| { + [ + year.token(TIMESTAMP_YEAR), + minus, + month.token(TIMESTAMP_MONTH), + minus_, + day.token(TIMESTAMP_DAY), + ] + }, + )(i) +} + +fn dayname(i: Input) -> IResult { + map( + take_while1(|c: char| { + !c.is_ascii_whitespace() + && !c.is_ascii_digit() + && c != '+' + && c != '-' + && c != ']' + && c != '>' + && c != '.' + }), + |i: Input| i.token(TIMESTAMP_DAYNAME), + )(i) +} + +fn time(i: Input) -> IResult { + map( + tuple(( + take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), + colon_token, + take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), + )), + |(hour, colon, minute)| { + [ + hour.token(TIMESTAMP_HOUR), + colon, + minute.token(TIMESTAMP_MINUTE), + ] + }, + )(i) +} + +fn repeater_or_delay( + input: Input, +) -> IResult { + let (input, mark) = alt(( + map(alt((tag("++"), tag("+"), tag(".+"))), |i: Input| { + i.token(TIMESTAMP_REPEATER_MARK) + }), + map(alt((tag("--"), tag("-"))), |i: Input| { + i.token(TIMESTAMP_DELAY_MARK) + }), + ))(input)?; + let (input, value) = digit1(input)?; + let (input, unit) = alt((tag("h"), tag("d"), tag("w"), tag("m"), tag("y")))(input)?; + + Ok(( + input, + ( + mark, + value.token(TIMESTAMP_VALUE), + unit.token(TIMESTAMP_UNIT), + ), + )) +} + +fn timestamp_node_base( + input: Input, + l_parser: impl Fn(Input) -> IResult, + r_parser: impl Fn(Input) -> IResult, +) -> IResult, ()> { + let (input, l_angle) = l_parser(input)?; + let (input, start_date) = date(input)?; + let (input, start_dayname) = opt(tuple((space1, dayname)))(input)?; + let (input, start_time) = opt(tuple((space1, time)))(input)?; + + let mut b = NodeBuilder::new(); + b.push(l_angle); + b.children.extend(start_date); + + if let Some((ws, dayname)) = start_dayname { + b.push(ws.ws_token()); + b.push(dayname); + } + + if input.as_str().starts_with('-') { + let (ws, start_time) = match start_time { + Some(start_time) => start_time, + None => return Err(nom::Err::Error(())), + }; + + let (input, minus) = minus_token(input)?; + let (input, end_time) = time(input)?; + + b.ws(ws); + b.children.extend(start_time); + b.push(minus); + b.children.extend(end_time); + + let mut iter = iterator(input, tuple((space1, repeater_or_delay))); + for (ws, (mark, value, unit)) in &mut iter { + b.children.extend([ws.ws_token(), mark, value, unit]); + } + let (input, _) = iter.finish()?; + + let (input, space) = space0(input)?; + let (input, r_angle) = r_parser(input)?; + + b.ws(space); + b.push(r_angle); + + return Ok((input, b.children)); + } + + if let Some((ws, start_time)) = start_time { + b.ws(ws); + b.children.extend(start_time); + } + + let mut iter = iterator(input, tuple((space1, repeater_or_delay))); + for (ws, (mark, value, unit)) in &mut iter { + b.children.extend([ws.ws_token(), mark, value, unit]); + } + let (input, _) = iter.finish()?; + + let (input, space) = space0(input)?; + let (input, r_angle) = r_parser(input)?; + + b.ws(space); + b.push(r_angle); + + if input.as_str().starts_with("--") { + let (input, minus2) = minus2_token(input)?; + let (input, l_angle) = l_parser(input)?; + let (input, end_date) = date(input)?; + let (input, end_dayname) = opt(tuple((space1, dayname)))(input)?; + let (input, end_time) = opt(tuple((space1, time)))(input)?; + + b.children.extend([minus2, l_angle]); + b.children.extend(end_date); + if let Some((ws, dayname)) = end_dayname { + b.push(ws.ws_token()); + b.push(dayname); + } + if let Some((ws, end_time)) = end_time { + b.ws(ws); + b.children.extend(end_time); + } + let mut iter = iterator(input, tuple((space1, repeater_or_delay))); + for (ws, (mark, value, unit)) in &mut iter { + b.children.extend([ws.ws_token(), mark, value, unit]); + } + let (input, _) = iter.finish()?; + + let (input, space_) = space0(input)?; + let (input, r_angle) = r_parser(input)?; + + b.ws(space_); + b.push(r_angle); + + Ok((input, b.children)) + } else { + Ok((input, b.children)) + } +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn timestamp_active_node(input: Input) -> IResult { + fn parser(input: Input) -> IResult { + let (input, children) = timestamp_node_base(input, l_angle_token, r_angle_token)?; + Ok((input, node(TIMESTAMP_ACTIVE, children))) + } + crate::lossless_parser!(parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(level = "debug", skip(input), fields(input = input.s)) +)] +pub fn timestamp_inactive_node(input: Input) -> IResult { + fn parser(input: Input) -> IResult { + let (input, children) = timestamp_node_base(input, l_bracket_token, r_bracket_token)?; + Ok((input, node(TIMESTAMP_INACTIVE, children))) + } + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::{ast::Timestamp, tests::to_ast}; + + let to_timestamp = to_ast::(timestamp_inactive_node); + + to_timestamp("[2003-09-16]"); + to_timestamp("[2003-09-16 09:09]"); + to_timestamp("[2003-09-16 Tue]"); + to_timestamp("[2003-09-16 Tue 09:09]"); + to_timestamp("[2003-09-16]--[2003-09-16]"); + to_timestamp("[2003-09-16 09:09]--[2003-09-16 09:09]"); + to_timestamp("[2003-09-16]--[2003-09-16 09:09]"); + to_timestamp("[2003-09-16 Tue]--[2003-09-16 Tue]"); + to_timestamp("[2003-09-16 Tue 09:09]--[2003-09-16 Tue 09:09]"); + to_timestamp("[2003-09-16 Tue 09:09-09:09]"); + to_timestamp("[2003-09-16 09:09-09:09 ]"); + to_timestamp("[2003-09-16 09:09 +1w .+1d]"); + to_timestamp("[2003-09-16 09:09]--[2003-09-16 +1w .+1d --1d ]"); + to_timestamp("[2003-09-16 Tue 09:09 +1w]--[2003-09-16 .+1d --1d ]"); + to_timestamp("[2003-09-16 09:09-10:19 +1w --1d]"); + + let ts = to_timestamp("[2003-09-16 Tue +1w]"); + assert!(!ts.is_range()); + insta::assert_debug_snapshot!( + ts.syntax, + @r###" + TIMESTAMP_INACTIVE@0..20 + L_BRACKET@0..1 "[" + TIMESTAMP_YEAR@1..5 "2003" + MINUS@5..6 "-" + TIMESTAMP_MONTH@6..8 "09" + MINUS@8..9 "-" + TIMESTAMP_DAY@9..11 "16" + WHITESPACE@11..12 " " + TIMESTAMP_DAYNAME@12..15 "Tue" + WHITESPACE@15..16 " " + TIMESTAMP_REPEATER_MARK@16..17 "+" + TIMESTAMP_VALUE@17..18 "1" + TIMESTAMP_UNIT@18..19 "w" + R_BRACKET@19..20 "]" + "### + ); + + let ts = to_timestamp("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"); + assert!(ts.is_range()); + insta::assert_debug_snapshot!( + ts.syntax, + @r###" + TIMESTAMP_INACTIVE@0..46 + L_BRACKET@0..1 "[" + TIMESTAMP_YEAR@1..5 "2003" + MINUS@5..6 "-" + TIMESTAMP_MONTH@6..8 "09" + MINUS@8..9 "-" + TIMESTAMP_DAY@9..11 "16" + WHITESPACE@11..12 " " + TIMESTAMP_DAYNAME@12..15 "Tue" + WHITESPACE@15..16 " " + TIMESTAMP_HOUR@16..18 "09" + COLON@18..19 ":" + TIMESTAMP_MINUTE@19..21 "39" + R_BRACKET@21..22 "]" + MINUS2@22..24 "--" + L_BRACKET@24..25 "[" + TIMESTAMP_YEAR@25..29 "2003" + MINUS@29..30 "-" + TIMESTAMP_MONTH@30..32 "09" + MINUS@32..33 "-" + TIMESTAMP_DAY@33..35 "16" + WHITESPACE@35..36 " " + TIMESTAMP_DAYNAME@36..39 "Tue" + WHITESPACE@39..40 " " + TIMESTAMP_HOUR@40..42 "10" + COLON@42..43 ":" + TIMESTAMP_MINUTE@43..45 "39" + R_BRACKET@45..46 "]" + "### + ); + + let ts = to_timestamp("[2003-09-16 Tue 09:39-10:39]"); + assert!(ts.is_range()); + insta::assert_debug_snapshot!( + ts.syntax, + @r###" + TIMESTAMP_INACTIVE@0..28 + L_BRACKET@0..1 "[" + TIMESTAMP_YEAR@1..5 "2003" + MINUS@5..6 "-" + TIMESTAMP_MONTH@6..8 "09" + MINUS@8..9 "-" + TIMESTAMP_DAY@9..11 "16" + WHITESPACE@11..12 " " + TIMESTAMP_DAYNAME@12..15 "Tue" + WHITESPACE@15..16 " " + TIMESTAMP_HOUR@16..18 "09" + COLON@18..19 ":" + TIMESTAMP_MINUTE@19..21 "39" + MINUS@21..22 "-" + TIMESTAMP_HOUR@22..24 "10" + COLON@24..25 ":" + TIMESTAMP_MINUTE@25..27 "39" + R_BRACKET@27..28 "]" + "### + ); +} diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..5ed57c3 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,24 @@ +//! test utils + +use nom::IResult; +use rowan::{ast::AstNode, SyntaxNode}; + +use crate::{ + syntax::{combinator::GreenElement, input::Input}, + ParseConfig, +}; + +pub fn to_ast( + parser: impl Fn(Input) -> IResult, +) -> impl Fn(&str) -> N { + move |s: &str| { + let input = Input { + s, + c: &ParseConfig::default(), + }; + let element = parser(input).unwrap().1; + let node = element.into_node().unwrap(); + let node = SyntaxNode::::new_root(node); + AstNode::cast(node).unwrap() + } +} diff --git a/src/validate.rs b/src/validate.rs deleted file mode 100644 index 535b268..0000000 --- a/src/validate.rs +++ /dev/null @@ -1,217 +0,0 @@ -use indextree::NodeId; -use std::ops::RangeInclusive; - -use crate::elements::{Element, Table, TableCell, TableRow}; -use crate::Org; - -/// Validation Error -#[derive(Debug)] -pub enum ValidationError { - /// Expected at least one child - ExpectedChildren { - at: NodeId, - }, - /// Expected no children - UnexpectedChildren { - at: NodeId, - }, - UnexpectedElement { - expected: &'static str, - at: NodeId, - }, - /// Expected a detached element - ExpectedDetached { - at: NodeId, - }, - /// Expected headline level in specify range - HeadlineLevelMismatch { - range: RangeInclusive, - at: NodeId, - }, -} - -impl ValidationError { - pub fn element<'a, 'b>(&self, org: &'a Org<'b>) -> &'a Element<'b> { - match self { - ValidationError::ExpectedChildren { at } - | ValidationError::UnexpectedChildren { at } - | ValidationError::UnexpectedElement { at, .. } - | ValidationError::ExpectedDetached { at } - | ValidationError::HeadlineLevelMismatch { at, .. } => &org[*at], - } - } -} - -pub type ValidationResult = Result; - -impl Org<'_> { - /// Validates an `Org` struct. - pub fn validate(&self) -> Vec { - let mut errors = Vec::new(); - - macro_rules! expect_element { - ($node:ident, $expect:expr, $($pattern:pat)|+) => { - match self[$node] { - $($pattern)|+ => (), - _ => errors.push(ValidationError::UnexpectedElement { - expected: $expect, - at: $node - }), - } - }; - } - - macro_rules! expect_children { - ($node:ident) => { - if self.arena[$node].first_child().is_none() { - errors.push(ValidationError::ExpectedChildren { at: $node }); - } - }; - } - - for node_id in self.root.descendants(&self.arena) { - let node = &self.arena[node_id]; - match node.get() { - Element::Document { .. } => { - let mut children = node_id.children(&self.arena); - if let Some(child) = children.next() { - expect_element!( - child, - "Headline|Section", - Element::Headline { .. } | Element::Section - ); - } - - for child in children { - expect_element!( - child, - "Headline", - Element::Headline { .. } - ); - } - } - Element::Headline { .. } => { - expect_children!(node_id); - - let mut children = node_id.children(&self.arena); - if let Some(child) = children.next() { - expect_element!(child, "Title", Element::Title(_)); - } - - if let Some(child) = children.next() { - expect_element!( - child, - "Headline|Section", - Element::Headline { .. } | Element::Section - ); - } - - for child in children { - expect_element!( - child, - "Headline", - Element::Headline { .. } - ); - } - } - Element::Title(title) => { - if !title.raw.is_empty() && node.first_child().is_none() { - errors.push(ValidationError::ExpectedChildren { at: node_id }); - } - } - Element::List(_) => { - expect_children!(node_id); - for child in node_id.children(&self.arena) { - expect_element!(child, "ListItem", Element::ListItem(_)); - } - } - Element::Table(Table::Org { .. }) => { - for child in node_id.children(&self.arena) { - expect_element!(child, "TableRow", Element::TableRow(_)); - } - } - Element::TableRow(TableRow::Header) => { - for child in node_id.children(&self.arena) { - expect_element!( - child, - "TableCell::Header", - Element::TableCell(TableCell::Header) - ); - } - } - Element::TableRow(TableRow::Body) => { - for child in node_id.children(&self.arena) { - expect_element!( - child, - "TableCell::Body", - Element::TableCell(TableCell::Body) - ); - } - } - Element::CommentBlock(_) - | Element::ExampleBlock(_) - | Element::ExportBlock(_) - | Element::SourceBlock(_) - | Element::BabelCall(_) - | Element::InlineSrc(_) - | Element::Code { .. } - | Element::FnRef(_) - | Element::InlineCall(_) - | Element::Link(_) - | Element::Macros(_) - | Element::RadioTarget - | Element::Snippet(_) - | Element::Target(_) - | Element::Text { .. } - | Element::Timestamp(_) - | Element::Verbatim { .. } - | Element::FnDef(_) - | Element::Clock(_) - | Element::Comment { .. } - | Element::FixedWidth { .. } - | Element::Keyword(_) - | Element::Rule(_) - | Element::Cookie(_) - | Element::TableRow(TableRow::BodyRule) - | Element::TableRow(TableRow::HeaderRule) => { - if node.first_child().is_some() { - errors.push(ValidationError::UnexpectedChildren { at: node_id }); - } - } - Element::SpecialBlock(_) - | Element::QuoteBlock(_) - | Element::CenterBlock(_) - | Element::VerseBlock(_) - | Element::Paragraph { .. } - | Element::Section - | Element::Bold - | Element::Italic - | Element::Underline - | Element::Strike - | Element::DynBlock(_) => { - expect_children!(node_id); - } - Element::ListItem(_) - | Element::Drawer(_) - | Element::TableCell(_) - | Element::Table(_) => (), - } - } - errors - } - - pub(crate) fn debug_validate(&self) { - if cfg!(debug_assertions) { - let errors = self.validate(); - if !errors.is_empty() { - eprintln!("Org validation failed. {} error(s) found:", errors.len()); - for err in errors { - eprintln!("{:?} at {:?}", err, err.element(self)); - } - panic!( - "Looks like there's a bug in orgize! Please report it with your org-mode content at https://github.com/PoiScript/orgize/issues." - ); - } - } - } -} diff --git a/tests/blank.rs b/tests/blank.rs deleted file mode 100644 index 288e26a..0000000 --- a/tests/blank.rs +++ /dev/null @@ -1,84 +0,0 @@ -use orgize::Org; - -const ORG_STR: &str = r#" - -#+TITLE: org - -#+BEGIN_QUOTE - -CONTENTS - -#+END_QUOTE - -* Headline 1 -SCHEDULED: <2019-10-28 Mon> -:PROPERTIES: -:ID: headline-1 -:END: - -:LOGBOOK: - -CLOCK: [2019-10-28 Mon 08:53] - -CLOCK: [2019-10-28 Mon 08:53]--[2019-10-28 Mon 08:53] => 0:00 - -:END: - ------ - -#+CALL: VALUE - -# -# Comment -# - -#+BEGIN: NAME PARAMETERS - -CONTENTS - -#+END: - -: -: Fixed width -: - -#+BEGIN_COMMENT - -COMMENT - -#+END_COMMENT - -#+BEGIN_EXAMPLE -#+END_EXAMPLE - - 1. 1 - -2. 2 - - 3. 3 - - + 1 - - + 2 - - - 3 - - - 4 - - + 5 - - - -"#; - -#[test] -fn blank() { - let org = Org::parse(ORG_STR); - - let mut writer = Vec::new(); - org.write_org(&mut writer).unwrap(); - - // eprintln!("{}", serde_json::to_string_pretty(&org).unwrap()); - - assert_eq!(String::from_utf8(writer).unwrap(), ORG_STR); -} diff --git a/tests/html.rs b/tests/html.rs new file mode 100644 index 0000000..ab1a60e --- /dev/null +++ b/tests/html.rs @@ -0,0 +1,176 @@ +use orgize::Org; + +#[test] +fn emphasis() { + insta::assert_snapshot!( + Org::parse("*bold*, /italic/,\n_underlined_, =verbatim= and ~code~").to_html(), + @r###" +

    bold, italic, + underlined, verbatim and code

    + "### + ); +} + +#[test] +fn link() { + insta::assert_snapshot!( + Org::parse("Visit[[http://example.com][link1]]or[[http://example.com][link1]].").to_html(), + @r###"

    Visitlink1orlink1.

    "### + ); +} + +#[test] +fn section_and_headline() { + insta::assert_snapshot!( + Org::parse(r#" +* title 1 +section 1 +** title 2 +section 2 +* title 3 +section 3 +* title 4 +section 4 +"#).to_html(), + @r###" +

    title 1

    section 1 +

    title 2

    section 2 +

    title 3

    section 3 +

    title 4

    section 4 +

    + "### + ); +} + +#[test] +fn list() { + insta::assert_snapshot!( + Org::parse(r#" ++ 1 + ++ 2 + + - 3 + + - 4 + ++ 5 +"#).to_html(), + @r###" +
    • 1 +

    • 2 +

      • 3 +

      • 4 +

    • 5 +

    + "### + ); +} + +#[test] +fn snippet() { + insta::assert_snapshot!( + Org::parse("@@html:@@delete this@@html:@@").to_html(), + @"

    delete this

    " + ); +} + +#[test] +fn paragraphs() { + insta::assert_snapshot!( + Org::parse(r#" +* title + +paragraph 1 + +paragraph 2 + +paragraph 3 + +paragraph 4 +"#).to_html(), + @r###" +

    title

    paragraph 1 +

    paragraph 2 +

    paragraph 3 +

    paragraph 4 +

    + "### + ); +} + +#[test] +fn table() { + // don't has table header + insta::assert_snapshot!( + Org::parse(r#" +|-----+-----+-----| +| 0 | 1 | 2 | +| 4 | 5 | 6 | +|-----+-----+-----| +"#).to_html(), + @"
    012
    456
    " + ); + + // has table header + insta::assert_snapshot!( + Org::parse(r#" +| 0 | 1 | 2 | +|-----+-----+-----| +| 4 | 5 | 6 | +|-----+-----+-----| +"#).to_html(), + @"
    012
    456
    " + ); + + // has two table body + insta::assert_snapshot!( + Org::parse(r#" +| 0 | 1 | 2 | +|-----+-----+-----| +| 4 | 5 | 6 | +|-----+-----+-----| +| 7 | 8 | 9 | +"#).to_html(), + @"
    012
    456
    789
    " + ); + + // multiple row rule + insta::assert_snapshot!( + Org::parse(r#" +| 0 | 1 | 2 | +|-----+-----+-----| +|-----+-----+-----| +| 4 | 5 | 6 | +"#).to_html(), + @"
    012
    456
    " + ); + + // empty + insta::assert_snapshot!( + Org::parse(r#" +|-----+-----+-----| +|-----+-----+-----| +"#).to_html(), + @"
    " + ); + + insta::assert_snapshot!( + Org::parse(r#" +| +|- +| +|- +| +"#).to_html(), + @"
    " + ); +} + +#[test] +fn line_break() { + insta::assert_debug_snapshot!( + Org::parse("aa\\\\\nbb").to_html(), + @r###""

    aa
    bb

    ""### + ); +} diff --git a/tests/issue_10.rs b/tests/issue_10.rs deleted file mode 100644 index f0570f8..0000000 --- a/tests/issue_10.rs +++ /dev/null @@ -1,18 +0,0 @@ -use orgize::Org; - -#[test] -fn can_handle_empty_emphasis() { - let cases = &[ - "* / // a", - "\"* / // a\"", - "* * ** a", - "* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n", - "* 2020\n:DRAWER:\n* ** a\n:END:", - "* * ** :a:", - "* * ** " - ]; - - for case in cases { - let _ = Org::parse(case); - } -} diff --git a/tests/issue_11.rs b/tests/issue_11.rs deleted file mode 100644 index d22e72b..0000000 --- a/tests/issue_11.rs +++ /dev/null @@ -1,21 +0,0 @@ -use orgize::Org; - -#[test] -fn can_handle_empty_list_item() { - let cases = &[ - "0. ", - "* \n0. ", - " * ", - " 0. ", - "\t* ", - "- ", - "- hello\n- ", - "- \n- hello", - "- hello\n- \n- world", - "* world\n- ", - ]; - - for case in cases { - let _ = Org::parse(case); - } -} diff --git a/tests/issue_15_16.rs b/tests/issue_15_16.rs deleted file mode 100644 index 3d8d9fc..0000000 --- a/tests/issue_15_16.rs +++ /dev/null @@ -1,26 +0,0 @@ -use orgize::Org; - -#[test] -fn bad_headline_tags() { - contains_no_tag(Org::parse("* a ::")); - - contains_no_tag(Org::parse("* a :(:")); - - contains_one_tag(Org::parse("* a \t:_:"), "_"); - - contains_one_tag(Org::parse("* a \t :@:"), "@"); - - contains_one_tag(Org::parse("* a :#:"), "#"); - - contains_one_tag(Org::parse("* a\t :%:"), "%"); - - contains_one_tag(Org::parse("* a :余:"), "余"); -} - -fn contains_no_tag(org: Org) { - assert!(org.headlines().next().unwrap().title(&org).tags.is_empty()); -} - -fn contains_one_tag(org: Org, tag: &str) { - assert_eq!(vec![tag], org.headlines().next().unwrap().title(&org).tags); -} diff --git a/tests/issue_22.rs b/tests/issue_22.rs deleted file mode 100644 index c9a71ee..0000000 --- a/tests/issue_22.rs +++ /dev/null @@ -1,24 +0,0 @@ -use orgize::Org; - -#[test] -fn whitespaces() { - let org = Org::parse(" "); - - assert(&org); - - let org = Org::parse("\t \t \n \t \t \n \t"); - - assert(&org); - - let org = Org::parse("\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}"); - - assert(&org); -} - -fn assert(org: &Org) { - assert_eq!( - org.iter().count(), - 2, - "should contains only one element - document" - ); -} diff --git a/tests/parse.rs b/tests/parse.rs index 69fb98c..813316b 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -1,172 +1,37 @@ -use orgize::Org; -use pretty_assertions::assert_eq; +const INPUT: &[&str] = &[ + // issue 10 + "* / // a", + "\"* / // a\"", + "* * ** a", + "* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n", + "* 2020\n:DRAWER:\n* ** a\n:END:", + "* * ** :a:", + "* * ** ", + // issue 11 + "0. ", + "* \n0. ", + " * ", + " 0. ", + "\t* ", + "- ", + "- hello\n- ", + "- \n- hello", + "- hello\n- \n- world", + "* world\n- ", + // issue 22 + "\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}", + // fuzz test + "___\n", + "\n\n\n", + "\n*", + "\r-", + "6\r\n", + "|\n\u{b}|" +]; -macro_rules! test_suite { - ($name:ident, $content:expr, $expected:expr) => { - #[test] - fn $name() { - let mut writer = Vec::new(); - let org = Org::parse($content); - org.write_html(&mut writer).unwrap(); - let string = String::from_utf8(writer).unwrap(); - assert_eq!(string, $expected); - } - }; +#[test] +fn parse() { + for input in INPUT { + let _ = orgize::Org::parse(input); + } } - -test_suite!( - emphasis, - "*bold*, /italic/,\n_underlined_, =verbatim= and ~code~", - "

    bold, italic,\nunderlined, \ - verbatim and code

    " -); - -test_suite!( - link, - "Visit[[http://example.com][link1]]or[[http://example.com][link1]].", - r#"

    Visitlink1orlink1.

    "# -); - -test_suite!( - section_and_headline, - r#" -* title 1 -section 1 -** title 2 -section 2 -* title 3 -section 3 -* title 4 -section 4 -"#, - "

    title 1

    section 1

    \ -

    title 2

    section 2

    \ -

    title 3

    section 3

    \ -

    title 4

    section 4

    " -); - -test_suite!( - list, - r#" -+ 1 - -+ 2 - - - 3 - - - 4 - -+ 5 -"#, - "
      \ -
    • 1

    • \ -
    • 2

      • 3

      • 4

    • \ -
    • 5

    • \ -
    " -); - -test_suite!( - snippet, - "@@html:@@delete this@@html:@@", - "

    delete this

    " -); - -test_suite!( - paragraphs, - r#" -* title - -paragraph 1 - -paragraph 2 - -paragraph 3 - -paragraph 4 -"#, - "

    title

    \ -

    paragraph 1

    paragraph 2

    \ -

    paragraph 3

    paragraph 4

    \ -
    " -); - -test_suite!( - table1, - r#" -|-----+-----+-----| -| 0 | 1 | 2 | -|-----+-----+-----| -| 4 | 5 | 6 | -"#, - "
    \ - \ - \ -
    012
    456
    " -); - -test_suite!( - table2, - r#" -|-----+-----+-----| -| 0 | 1 | 2 | -| 4 | 5 | 6 | -|-----+-----+-----| -"#, - "
    \ - \ - \ -
    012
    456
    " -); - -test_suite!( - table3, - r#" -|-----+-----+-----| -|-----+-----+-----| -| 0 | 1 | 2 | -| 4 | 5 | 6 | -"#, - "
    \ - \ - \ -
    012
    456
    " -); - -test_suite!( - table4, - r#" -| 0 | 1 | 2 | -| 4 | 5 | 6 | -|-----+-----+-----| -|-----+-----+-----| -"#, - "
    \ - \ - \ -
    012
    456
    " -); - -test_suite!( - table5, - r#" -|-----+-----+-----| -|-----+-----+-----| -"#, - "
    " -); - -test_suite!( - table6, - r#" -| -|- -| -|- -| -"#, - "
    \ - \ - \ - \ -
    " -); diff --git a/wasm/.gitignore b/wasm/.gitignore new file mode 100644 index 0000000..56aa179 --- /dev/null +++ b/wasm/.gitignore @@ -0,0 +1,6 @@ +/lib +/node_modules +/out-tsc +/package +/pkg +*.tgz \ No newline at end of file diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml new file mode 100644 index 0000000..f2eefce --- /dev/null +++ b/wasm/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "orgize-wasm" +publish = false +version = "0.10.0-alpha.7" +authors = ["PoiScript "] +repository = "https://github.com/PoiScript/orgize" +edition = "2021" +license = "MIT" + +[lib] +crate-type = ["cdylib", "rlib"] + +[dependencies] +orgize = { path = ".." } +wasm-bindgen = "0.2" diff --git a/wasm/README.md b/wasm/README.md new file mode 100644 index 0000000..673208b --- /dev/null +++ b/wasm/README.md @@ -0,0 +1,62 @@ +# orgize + +![npm](https://img.shields.io/npm/v/orgize) + +## Install + +```sh +npm install orgize +yarn add orgize +``` + +## Browser + +```js +import init, { Org } from "orgize"; + +init().then(() => { + const org = new Org("* Hello, /world/!"); + const html = org.html(); + console.log(html); + org.free(); +}); +``` + +## Node.js + +```js +import { Org, initSync } from "orgize"; +import { readFile } from "node:fs/promises"; + +// you can also use import.meta.resolve, but it's currently behind +// an experimental flag --experimental-import-meta-resolve +import { createRequire } from "node:module"; +const require = createRequire(import.meta.url); + +readFile(require.resolve("orgize/wasm")).then((bytes) => { + initSync(bytes); + + const org = new Org("* Hello, /world/!"); + const html = org.html(); + console.log(html); + org.free(); +}); +``` + +## Notes + +1. You must **initialize** the WebAssembly module (using either `init` or + `initSync` function) before using the `Org` class; + +2. Don't forgot to call `org.free()` to **release the memory** that + allocated by Rust; + +3. This npm package is primarily aim to demonstrate and power the online + demo, so it doesn't provide any customization or settings. + + If you need to, please build your own npm package by `wasm-pack`. + (or `napi` if you're only targeting node.js users) + +## License + +MIT diff --git a/wasm/build.rs b/wasm/build.rs new file mode 100644 index 0000000..db4d2ca --- /dev/null +++ b/wasm/build.rs @@ -0,0 +1,22 @@ +use std::process::Command; + +fn main() { + { + let output = Command::new("git") + .args(["rev-parse", "--short", "HEAD"]) + .output() + .unwrap(); + + let git_hash = String::from_utf8(output.stdout).unwrap(); + + println!("cargo:rustc-env=CARGO_GIT_HASH={}", git_hash); + } + + { + let output = Command::new("date").args(["-R"]).output().unwrap(); + + let git_hash = String::from_utf8(output.stdout).unwrap(); + + println!("cargo:rustc-env=CARGO_BUILD_TIME={}", git_hash); + } +} diff --git a/wasm/index.html b/wasm/index.html new file mode 100644 index 0000000..b755776 --- /dev/null +++ b/wasm/index.html @@ -0,0 +1,348 @@ + + + + + + + Orgize + + + + + + + + + + + + +
    +
    +
    +
    + +
    +
    + + HTML (rendered) + + HTML + Syntax + Traverse +
    +
    +
    + +
    +
    +
    +
    +
    + + + + diff --git a/wasm/package.json b/wasm/package.json new file mode 100644 index 0000000..7c22f42 --- /dev/null +++ b/wasm/package.json @@ -0,0 +1,29 @@ +{ + "name": "orgize", + "version": "0.10.0-alpha.0", + "license": "MIT", + "author": "PoiScript ", + "scripts": { + "build": "rm -rf dist && wasm-pack build -t web -d dist --out-name orgize" + }, + "repository": { + "type": "git", + "url": "https://github.com/PoiScript/orgize" + }, + "module": "./dist/orgize.js", + "typings": "./dist/orgize.d.ts", + "exports": { + ".": { + "types": "./dist/orgize.d.ts", + "import": "./dist/orgize.js" + }, + "./wasm": "./dist/orgize_bg.wasm" + }, + "files": [ + "dist/orgize_bg.wasm", + "dist/orgize.js", + "dist/orgize.d.ts", + "index.html", + "README.md" + ] +} diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs new file mode 100644 index 0000000..79792cd --- /dev/null +++ b/wasm/src/lib.rs @@ -0,0 +1,133 @@ +use orgize::{ + export::{from_fn, Container, Event}, + rowan::ast::AstNode, + Org as Inner, +}; +use std::fmt::Write; + +use wasm_bindgen::prelude::*; + +#[wasm_bindgen] +pub struct Org { + inner: Inner, +} + +#[wasm_bindgen] +impl Org { + #[wasm_bindgen(constructor)] + pub fn parse(input: &str) -> Self { + Org { + inner: Inner::parse(input), + } + } + + pub fn html(&self) -> String { + self.inner.to_html() + } + + pub fn org(&self) -> String { + self.inner.to_org() + } + + pub fn syntax(&self) -> String { + format!("{:#?}", self.inner.document().syntax()) + } + + pub fn update(&mut self, s: &str) { + self.inner = Inner::parse(s); + } + + pub fn traverse(&self) -> String { + let mut result = String::new(); + let mut ident = 0; + let mut handler = from_fn(|event| { + let (name, range) = match &event { + Event::Enter(container) => match container { + Container::Document(x) => ("Document", x.text_range()), + Container::Section(x) => ("Section", x.text_range()), + Container::Paragraph(x) => ("Paragraph", x.text_range()), + Container::Headline(x) => ("Headline", x.text_range()), + Container::OrgTable(x) => ("OrgTable", x.text_range()), + Container::OrgTableRow(x) => ("OrgTableRow", x.text_range()), + Container::OrgTableCell(x) => ("OrgTableCell", x.text_range()), + Container::TableEl(x) => ("TableEl", x.text_range()), + Container::List(x) => ("List", x.text_range()), + Container::ListItem(x) => ("ListItem", x.text_range()), + Container::Drawer(x) => ("Drawer", x.text_range()), + Container::DynBlock(x) => ("DynBlock", x.text_range()), + Container::FnDef(x) => ("FnDef", x.text_range()), + Container::Comment(x) => ("Comment", x.text_range()), + Container::FixedWidth(x) => ("FixedWidth", x.text_range()), + Container::SpecialBlock(x) => ("SpecialBlock", x.text_range()), + Container::QuoteBlock(x) => ("QuoteBlock", x.text_range()), + Container::CenterBlock(x) => ("CenterBlock", x.text_range()), + Container::VerseBlock(x) => ("VerseBlock", x.text_range()), + Container::CommentBlock(x) => ("CommentBlock", x.text_range()), + Container::ExampleBlock(x) => ("ExampleBlock", x.text_range()), + Container::ExportBlock(x) => ("ExportBlock", x.text_range()), + Container::SourceBlock(x) => ("SourceBlock", x.text_range()), + Container::Link(x) => ("Link", x.text_range()), + Container::RadioTarget(x) => ("RadioTarget", x.text_range()), + Container::FnRef(x) => ("FnRef", x.text_range()), + Container::Target(x) => ("Target", x.text_range()), + Container::Bold(x) => ("Bold", x.text_range()), + Container::Strike(x) => ("Strike", x.text_range()), + Container::Italic(x) => ("Italic", x.text_range()), + Container::Underline(x) => ("Underline", x.text_range()), + Container::Verbatim(x) => ("Verbatim", x.text_range()), + Container::Code(x) => ("Code", x.text_range()), + Container::Superscript(x) => ("Superscript", x.text_range()), + Container::Subscript(x) => ("Subscript", x.text_range()), + Container::BabelCall(x) => ("BabelCall", x.text_range()), + Container::PropertyDrawer(x) => ("PropertyDrawer", x.text_range()), + Container::AffiliatedKeyword(x) => ("AffiliatedKeyword", x.text_range()), + Container::Keyword(x) => ("Keyword", x.text_range()), + _ => unreachable!(), + }, + Event::Leave(_) => { + ident -= 2; + return; + } + Event::Text(x) => ("Text", x.text_range()), + Event::Macros(x) => ("Macros", x.text_range()), + Event::Cookie(x) => ("Cookie", x.text_range()), + Event::InlineCall(x) => ("InlineCall", x.text_range()), + Event::InlineSrc(x) => ("InlineSrc", x.text_range()), + Event::Clock(x) => ("Clock", x.text_range()), + Event::LineBreak(x) => ("LineBreak", x.text_range()), + Event::Snippet(x) => ("Snippet", x.text_range()), + Event::Rule(x) => ("Rule", x.text_range()), + Event::Timestamp(x) => ("Timestamp", x.text_range()), + Event::LatexFragment(x) => ("LatexFragment", x.text_range()), + Event::LatexEnvironment(x) => ("LatexEnvironment", x.text_range()), + Event::Entity(x) => ("Entity", x.text_range()), + _ => unreachable!(), + }; + + let _ = writeln!( + &mut result, + "{:ident$}{}@{}..{}", + "", + name, + u32::from(range.start()), + u32::from(range.end()) + ); + + if let Event::Enter(_) = event { + ident += 2; + } + }); + self.inner.traverse(&mut handler); + result + } + + #[wasm_bindgen(getter, js_name = "buildTime")] + pub fn build_time() -> String { + env!("CARGO_BUILD_TIME").into() + } + + #[wasm_bindgen(getter, js_name = "gitHash")] + pub fn git_hash() -> String { + env!("CARGO_GIT_HASH").into() + } +}