Compare commits

...
Sign in to create a new pull request.

113 commits

Author SHA1 Message Date
Xuanwo
5f26c94dce
chore: Remove deprecated usize::max_value (#82)
Signed-off-by: Xuanwo <github@xuanwo.io>
2024-07-22 14:15:12 +08:00
PoiScript
8be87eb774
release: bump version to 0.10.0-alpha.10 2024-06-11 15:30:47 +08:00
PoiScript
f150af7341
fix: verify use_sub_superscript 2024-06-11 15:29:00 +08:00
PoiScript
5bc15d80ff
fix: newline in not allowed in property name 2024-06-11 15:16:11 +08:00
PoiScript
13ebef05d2
fix: emit cloze event in Traverse (#79) 2024-06-11 14:48:39 +08:00
PoiScript
eab9eb68ec
feat: support use_sub_superscript config 2024-06-11 14:42:06 +08:00
PoiScript
9b8aec02a4
feat: support top-level properties drawer (#78) 2024-05-09 16:43:05 +08:00
PoiScript
2f31fd4b10
feat: traverse tab in demo web site 2024-05-09 14:23:45 +08:00
PoiScript
0b70cf0415
feat: ExportBlock::value 2024-05-09 13:05:34 +08:00
PoiScript
6c4513d857
feat: initial support for org-fc cloze syntax 2024-05-09 13:04:49 +08:00
PoiScript
8a29a46095
chore: replace some debug_assert with explicit panic 2024-05-09 11:02:46 +08:00
PoiScript
b9a3c7a889
release: bump version to 0.10.0-alpha.9 2024-05-08 19:42:22 +08:00
PoiScript
da8b3385bf
style: cargo clippy 2024-05-08 19:41:12 +08:00
PoiScript
41f3622024
fix: node property parsing 2024-05-08 19:38:03 +08:00
PoiScript
f9b2d73dd4
feat: Link::description & Link::description_raw 2024-05-08 18:46:39 +08:00
PoiScript
df0d5baec9
fix: ignore keyword on html export 2024-05-07 14:53:02 +08:00
PoiScript
caa7c0aacd
feat: markdown export 2024-04-29 17:28:49 +08:00
PoiScript
545db900cd
release: bump version to 0.10.0-alpha.8 2024-04-24 15:00:00 +08:00
PoiScript
1807f19407
feat: add content_begin, content_end and content_raw 2024-04-24 14:56:02 +08:00
PoiScript
8c3ca13e8a
fix: remove assertion in traverser (#77) 2024-04-24 11:52:12 +08:00
PoiScript
cf0f5ce3e4
fix: non-last headline must ends with newline 2024-04-24 11:47:53 +08:00
PoiScript
25ec01a972
docs: update README.md 2024-04-13 18:17:58 +08:00
PoiScript
51a94623f8
feat: element.text_range() 2024-04-13 18:02:23 +08:00
PoiScript
f97fd4cac5
feat: Org::replace_range 2024-04-13 17:50:36 +08:00
PoiScript
f9d56cf899
feat: Timestamp::time_delta & Headline::is_todo & Headline::is_done 2024-04-09 15:58:55 +08:00
PoiScript
f918bf4ca6
feat: add Document::title & Document::keywords 2024-04-01 11:44:00 +08:00
PoiScript
0d8ef46a38
feat: update Event::Text 2024-04-01 11:43:30 +08:00
PoiScript
e82adf92b7
feat: Org::node_at_offset api 2024-04-01 11:41:40 +08:00
PoiScript
ddb71dc714
feat: add raw method 2024-03-31 18:55:49 +08:00
PoiScript
a11f7a5007
feat: rename begin() to start() & return TextSize 2024-03-27 14:23:31 +08:00
PoiScript
e12e89d401
feat: add from_fn and from_fn_with_ctx 2024-03-26 17:17:25 +08:00
PoiScript
42c47fa5b6
feat: Drawer::name and Headline::clocks 2024-03-15 19:22:08 +08:00
PoiScript
b03233ca34
style: cargo clippy 2024-03-06 15:44:31 +08:00
PoiScript
f65e240e92
feat: tracing is now optional 2024-03-06 15:42:32 +08:00
PoiScript
14d1555fc1
chore: reorganize directories 2024-03-06 15:20:40 +08:00
PoiScript
42cb1d21bd
chore: remove orgize-cli & orgize-lsp 2024-03-06 15:05:05 +08:00
PoiScript
edd73e3c6d
feat(lsp): semantic tokens for headline keyword, priority and timestamp 2024-01-10 05:33:05 +08:00
PoiScript
e0021b4a91
release: bump version to 0.10.0-alpha.7 2024-01-10 04:41:41 +08:00
PoiScript
ae7589fd61
feat: Token::syntax 2024-01-10 04:38:06 +08:00
PoiScript
714f878ec9
feat: Headline::title_raw 2024-01-10 04:38:05 +08:00
PoiScript
396535073c
docs: fix typo 2023-12-21 23:57:36 +08:00
PoiScript
a2248cb9be
feat: update README.md and vscode icons 2023-12-21 23:51:42 +08:00
PoiScript
0916b40cef
feat: update html exporting 2023-12-21 05:20:21 +08:00
PoiScript
9f1a4c84ee
feat(cli): fmt subcommand 2023-12-21 05:18:32 +08:00
PoiScript
de4ff9aa61
feat(common): list formatting 2023-12-21 04:48:41 +08:00
PoiScript
2e9de16e90
feat(lsp): completion with snippet 2023-12-21 04:45:36 +08:00
PoiScript
5237560b61
feat(lsp): completion support 2023-12-21 02:57:13 +08:00
PoiScript
0ebb6552d7
feat(editors/vscode): html preview 2023-12-21 02:55:36 +08:00
PoiScript
4cc1130a17
chore: add orgize-{cli,common,lsp} package 2023-12-20 21:56:10 +08:00
PoiScript
6930640866
feat: parse affiliated keywords in block 2023-12-14 20:00:24 +08:00
PoiScript
868cd9ab10
feat: FixedWidth::value & Comment::value 2023-12-14 18:15:09 +08:00
PoiScript
7d4b176975
chore: impl Deref for Input 2023-12-13 01:32:53 +08:00
PoiScript
27de7ee68c
feat: block parsing 2023-12-13 01:07:11 +08:00
PoiScript
b2123d1acd
release: bump version to 0.10.0-alpha.6 2023-12-05 17:36:48 +08:00
PoiScript
19c62979f0
feat: simpify Traverser trait 2023-12-05 17:32:55 +08:00
PoiScript
fd098f0cf1
feat: parse block argument 2023-11-27 15:05:28 +08:00
PoiScript
3234554590
fix: list item cannot have an asterisk at the beginning of line 2023-11-24 16:47:58 +08:00
PoiScript
03756b95f9
ci: use dtolnay/rust-toolchain 2023-11-24 16:21:44 +08:00
PoiScript
f8fd1306e2
feat: support formulas in org table 2023-11-24 16:15:11 +08:00
PoiScript
3c2c8b28fd
feat: various update 2023-11-24 14:57:45 +08:00
PoiScript
be32dc24e0
chore: update benchmark 2023-11-24 13:34:13 +08:00
PoiScript
471a23c958
fix: consider single '\r' as newline 2023-11-23 18:08:39 +08:00
PoiScript
8fcfd60712
feat: support objects in link description 2023-11-23 15:47:14 +08:00
PoiScript
590212fddb
chore: remove dbg! 2023-11-22 23:22:49 +08:00
PoiScript
6913dbc817
release: bump version to 0.10.0-alpha.5 2023-11-21 18:46:34 +08:00
PoiScript
9019d4db9d
chore: remove outdated SYNTAX.md 2023-11-21 18:44:55 +08:00
PoiScript
c895b0bf4f
docs: update STATUS.md 2023-11-21 18:44:06 +08:00
PoiScript
8b5c545d4b
feat: support subscript and superscript 2023-11-21 18:42:55 +08:00
PoiScript
58dfb022c2
feat: support line breaks 2023-11-21 16:35:25 +08:00
PoiScript
6598095a9f
feat: is_object, is_element, is_lesser_element, is_greater_element 2023-11-21 16:07:53 +08:00
PoiScript
0c870b7be8
feat: support minimal sets of object in radio target 2023-11-21 14:55:33 +08:00
PoiScript
ab543b403e
feat: nth_repeater nth_delay 2023-11-21 11:55:33 +08:00
PoiScript
6c27a9257f
feat: introduce Token struct 2023-11-20 15:30:33 +08:00
PoiScript
9004de9930
release: bump version to 0.10.0-alpha.4 2023-11-20 01:47:23 +08:00
PoiScript
2f64e1e6af
feat: add todo_type method in Headline 2023-11-20 01:46:13 +08:00
PoiScript
c432d335f1
feat: timestamp in html export 2023-11-20 00:12:29 +08:00
PoiScript
c49f7e5716
feat: support repeater/delay parsing 2023-11-19 23:09:54 +08:00
PoiScript
a0fdf1d5c8
feat: support optional dayname in timestap (#27) 2023-11-19 18:06:15 +08:00
PoiScript
d7128035f5
release: bump version to 0.10.0-alpha.3 2023-11-19 16:48:34 +08:00
PoiScript
310664b05b
feat: support entity 2023-11-19 16:42:12 +08:00
PoiScript
917fe2f5d2
feat: latex in Traverser 2023-11-19 13:26:14 +08:00
PoiScript
00cfde6e90
feat: support latex environment parsing 2023-11-19 13:08:20 +08:00
PoiScript
4a3dd6aacb
feat: support latex fragment parsing 2023-11-19 13:08:19 +08:00
PoiScript
2aec1768da
chore: update demo 2023-11-18 22:40:45 +08:00
PoiScript
46ed6b8d91
build: remove serde_json from dev deps 2023-11-18 22:40:44 +08:00
PoiScript
12bee6fb6b
refactor: remove HeadlineTitle, ListItemTag, ListItemContent 2023-11-18 22:40:43 +08:00
PoiScript
d81c1a2966
release: bump version to 0.10.0-alpha.2 2023-11-17 22:42:39 +08:00
dependabot[bot]
5c37ac5b51
build(deps): update criterion requirement from 0.4 to 0.5 (#71) 2023-11-17 22:41:43 +08:00
dependabot[bot]
c2e1d4c375
build(deps): update indexmap requirement from 1.9 to 2.1 (#72) 2023-11-17 22:41:26 +08:00
PoiScript
ba1776808d
fix(object): fix edge case (#69) 2023-11-17 22:39:14 +08:00
PoiScript
babae8dbe8
test: add test case (#33) 2023-11-17 18:08:47 +08:00
PoiScript
52b498d3c8
feat: add parse example 2023-11-17 17:07:04 +08:00
PoiScript
6b2d57f4ae
docs: add development.md 2023-11-17 16:47:15 +08:00
PoiScript
20a46b132e
test: update test in Timestamp::start_to_chorno 2023-11-17 16:42:04 +08:00
PoiScript
c4d9aa4c51
style: run cargo clippy 2023-11-17 14:18:25 +08:00
PoiScript
e924359df6
feat: simplify public api 2023-11-17 13:34:06 +08:00
PoiScript
394c013fd2
docs: update readme 2023-11-17 00:39:19 +08:00
PoiScript
0245b9b2c4
fix: lossless_parser! should be pub(crate) 2023-11-17 00:37:50 +08:00
PoiScript
8698e59df3
fix: headline tag support multibyte alphanumeric 2023-11-17 00:10:27 +08:00
PoiScript
b7ddc0f076
feat: update list node parsing 2023-11-17 00:07:18 +08:00
PoiScript
ed987d468a
style: run cargo clippy 2023-11-15 14:11:18 +08:00
PoiScript
db7fb70724
feat: handle <thead> in html export 2023-11-15 12:55:35 +08:00
PoiScript
1362624083
feat: support affiliated keyword 2023-11-15 00:36:29 +08:00
PoiScript
a269f2f258
fix: single \r will be consider as blank line 2023-11-14 14:12:05 +08:00
PoiScript
b6e86a128a
test: fix issue_22 2023-11-13 18:01:07 +08:00
PoiScript
db6a827de2
docs: add github actions badge 2023-11-13 18:00:20 +08:00
PoiScript
55019e2e01
chore: bump license year 2023-11-13 17:25:20 +08:00
PoiScript
f570a84765
ci: fix ci 2023-11-13 17:21:33 +08:00
PoiScript
1f19ceb153
ci: fix ci 2023-11-13 17:19:02 +08:00
PoiScript
c61f90851c
ci: fix ci 2023-11-13 17:02:59 +08:00
PoiScript
61ed64486d
ci: fix ci 2023-11-13 16:56:18 +08:00
PoiScript
977e8d134b
ci: gh-pages action 2023-11-13 16:42:37 +08:00
PoiScript
af7c305c9e
chore: prepare for v0.10.0-alpha.1 2023-11-13 16:33:04 +08:00
147 changed files with 15016 additions and 10407 deletions

2
.cargo/config.toml Normal file
View file

@ -0,0 +1,2 @@
[registries.crates-io]
protocol = "sparse"

57
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,57 @@
name: CI
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@master
- name: Setup rust
uses: dtolnay/rust-toolchain@stable
with:
components: clippy, rustfmt
- run: |
cargo fmt -- --check
cargo test --all-features
cargo clippy
gh-pages:
if: github.ref == 'refs/heads/v0.10'
permissions:
contents: read
pages: write
id-token: write
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Setup Pages
uses: actions/configure-pages@v3
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build
run: wasm-pack build -t web -d ./dist --out-name orgize ./wasm/
- name: Upload artifact
uses: actions/upload-pages-artifact@v2
with:
path: "./wasm"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2

View file

@ -1,35 +0,0 @@
name: Rust
on: [push, pull_request]
jobs:
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Run rustfmt
run: cargo fmt -- --check
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
override: true
- name: Cache target/
uses: actions/cache@v1
with:
path: target
key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
- name: Run Test
run: cargo test --all-features

View file

@ -1,34 +0,0 @@
name: Wasm
on: [push, pull_request]
defaults:
run:
working-directory: wasm
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v1
with:
node-version: "14"
- name: Install
run: |
curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
yarn
- name: Build
run: |
wasm-pack build --target web --out-dir wasm/pkg -- --features wasm
yarn build
- name: Deploy to gh pages
uses: JamesIves/github-pages-deploy-action@4.1.5
with:
branch: gh-pages
folder: wasm/lib

2
.gitignore vendored
View file

@ -2,6 +2,6 @@
**/*.rs.bk
Cargo.lock
benches/*.org
.vscode
.gdb_history
perf.data*

View file

@ -1,51 +1,53 @@
[workspace]
resolver = "2"
members = [".", "./wasm"]
[package]
name = "orgize"
version = "0.9.0"
version = "0.10.0-alpha.10"
authors = ["PoiScript <poiscript@gmail.com>"]
description = "A Rust library for parsing orgmode files."
repository = "https://github.com/PoiScript/orgize"
readme = "README.md"
edition = "2018"
edition = "2021"
license = "MIT"
keywords = ["orgmode", "emacs", "parser"]
exclude = ["/wasm", "/.github"]
description = "A Rust library for parsing org-mode files."
readme = "README.md"
keywords = ["orgmode", "org-mode", "emacs", "parser"]
[package.metadata.docs.rs]
all-features = true
[badges]
travis-ci = { repository = "PoiScript/orgize" }
[lib]
crate-type = ["cdylib", "rlib"]
[profile.release]
# Tell `rustc` to optimize for small code size.
opt-level = "s"
[features]
default = ["ser"]
wasm = ["serde-wasm-bindgen", "wasm-bindgen", "wee_alloc"]
ser = ["serde", "serde_indextree", "indexmap/serde-1"]
default = []
indexmap = ["dep:indexmap"]
chrono = ["dep:chrono"]
tracing = ["dep:tracing"]
syntax-org-fc = []
[dependencies]
bytecount = "0.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", optional = true }
indextree = "4.3"
indexmap = { version = "2.1", optional = true }
jetscii = "0.5"
lazy_static = "1.4"
memchr = "2.4"
nom = { version = "7.0", default-features = false, features = ["std"] }
serde = { version = "1.0", optional = true, features = ["derive"] }
serde_indextree = { version = "0.2", optional = true }
syntect = { version = "4.6", optional = true }
indexmap = { version = "1.7", features = ["serde-1"], optional = true }
# wasm stuff
serde-wasm-bindgen = { version = "0.3", optional = true }
wasm-bindgen = { version = "0.2", optional = true }
wee_alloc = { version = "0.4", optional = true }
memchr = "2.5"
nom = { version = "7.1", default-features = false, features = ["std"] }
rowan = "0.15"
tracing = { version = "0.1", optional = true }
[dev-dependencies]
pretty_assertions = "1.0"
serde_json = "1.0"
criterion = "0.5"
insta = "1.29"
slugify = "0.1"
tracing-subscriber = { version = "0.3", features = ["fmt"] }
[[bench]]
name = "parse"
harness = false
[[example]]
name = "parse"
required-features = ["tracing"]
[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3

View file

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2021 Alex Lin (poi)
Copyright (c) 2019-2023 Alex Lin (poi)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

281
README.md
View file

@ -1,210 +1,119 @@
# Orgize
[![Build Status](https://travis-ci.org/PoiScript/orgize.svg?branch=master)](https://travis-ci.org/PoiScript/orgize)
[![Crates.io](https://img.shields.io/crates/v/orgize.svg)](https://crates.io/crates/orgize)
[![Document](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize)
[![Documentation](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize)
[![Build status](https://img.shields.io/github/actions/workflow/status/PoiScript/orgize/ci.yml)](https://github.com/PoiScript/orgize/actions/workflows/ci.yml)
![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)
A Rust library for parsing orgmode files.
A Rust library for parsing org-mode files.
[Live demo](https://orgize.herokuapp.com/)
Live Demo: <https://poiscript.github.io/orgize/>
## Parse
To parse a orgmode string, simply invoking the `Org::parse` function:
To parse a org-mode string, simply invoking the `Org::parse` function:
```rust
use orgize::{Org, rowan::ast::AstNode};
let org = Org::parse("* DONE Title :tag:");
assert_eq!(
format!("{:#?}", org.document().syntax()),
r#"DOCUMENT@0..18
HEADLINE@0..18
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_KEYWORD_DONE@2..6 "DONE"
WHITESPACE@6..7 " "
HEADLINE_TITLE@7..13
TEXT@7..13 "Title "
HEADLINE_TAGS@13..18
COLON@13..14 ":"
TEXT@14..17 "tag"
COLON@17..18 ":"
"#);
```
use `ParseConfig::parse` to specific a custom parse config
```rust
use orgize::{Org, ParseConfig, ast::Headline};
let config = ParseConfig {
// custom todo keywords
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
};
let org = config.parse("* TASK Title 1");
let hdl = org.first_node::<Headline>().unwrap();
assert_eq!(hdl.todo_keyword().unwrap(), "TASK");
```
## Traverse
Use `org.traverse(&mut traversal)` to walk through the syntax tree.
```rust
use orgize::{
export::{from_fn, Container, Event},
Org,
};
let mut hdl_count = 0;
let mut handler = from_fn(|event| {
if matches!(event, Event::Enter(Container::Headline(_))) {
hdl_count += 1;
}
});
Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler);
assert_eq!(hdl_count, 3);
```
## Modify
Use `org.replace_range(TextRange::new(start, end), "new_text")` to modify the syntax tree:
```rust
use orgize::{Org, ParseConfig, ast::Headline, TextRange};
let mut org = Org::parse("hello\n* world");
let hdl = org.first_node::<Headline>().unwrap();
org.replace_range(hdl.text_range(), "** WORLD!");
let hdl = org.first_node::<Headline>().unwrap();
assert_eq!(hdl.level(), 2);
org.replace_range(TextRange::up_to(hdl.start()), "");
assert_eq!(org.to_org(), "** WORLD!");
```
## Render to html
Call the `Org::to_html` function to export org element tree to html:
```rust
use orgize::Org;
Org::parse("* DONE Title :tag:");
```
or `Org::parse_custom`:
``` rust
use orgize::{Org, ParseConfig};
Org::parse_custom(
"* TASK Title 1",
&ParseConfig {
// custom todo keywords
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
},
);
```
## Iter
`Org::iter` function will returns an iterator of `Event`s, which is
a simple wrapper of `Element`.
```rust
use orgize::Org;
for event in Org::parse("* DONE Title :tag:").iter() {
// handling the event
}
```
**Note**: whether an element is container or not, it will appears twice in one loop.
One as `Event::Start(element)`, one as `Event::End(element)`.
## Render html
You can call the `Org::write_html` function to generate html directly, which
uses the `DefaultHtmlHandler` internally:
```rust
use orgize::Org;
let mut writer = Vec::new();
Org::parse("* title\n*section*").write_html(&mut writer).unwrap();
assert_eq!(
String::from_utf8(writer).unwrap(),
Org::parse("* title\n*section*").to_html(),
"<main><h1>title</h1><section><p><b>section</b></p></section></main>"
);
```
## Render html with custom `HtmlHandler`
To customize html rendering, simply implementing `HtmlHandler` trait and passing
it to the `Org::wirte_html_custom` function.
The following code demonstrates how to add a id for every headline and return
own error type while rendering.
```rust
use std::convert::From;
use std::io::{Error as IOError, Write};
use std::string::FromUtf8Error;
use orgize::export::{DefaultHtmlHandler, HtmlHandler};
use orgize::{Element, Org};
use slugify::slugify;
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
#[derive(Default)]
struct MyHtmlHandler(DefaultHtmlHandler);
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
if title.level > 6 {
return Err(MyError::Heading);
} else {
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">",
title.level,
slugify!(&title.raw),
)?;
}
} else {
// fallthrough to default handler
self.0.start(w, element)?;
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
write!(w, "</a></h{}>", title.level)?;
} else {
self.0.end(w, element)?;
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let mut writer = Vec::new();
let mut handler = MyHtmlHandler::default();
Org::parse("* title\n*section*").wirte_html_custom(&mut writer, &mut handler)?;
assert_eq!(
String::from_utf8(writer)?,
"<main><h1><a id=\"title\" href=\"#title\">title</a></h1>\
<section><p><b>section</b></p></section></main>"
);
Ok(())
}
```
**Note**: as I mentioned above, each element will appears two times while iterating.
And handler will silently ignores all end events from non-container elements.
So if you want to change how a non-container element renders, just redefine the `start`
function and leave the `end` function unchanged.
## Serde
`Org` struct have already implemented serde's `Serialize` trait. It means you can
serialize it into any format supported by serde, such as json:
```rust
use orgize::Org;
use serde_json::{json, to_string};
let org = Org::parse("I 'm *bold*.");
println!("{}", to_string(&org).unwrap());
// {
// "type": "document",
// "children": [{
// "type": "section",
// "children": [{
// "type": "paragraph",
// "children":[{
// "type": "text",
// "value":"I 'm "
// }, {
// "type": "bold",
// "children":[{
// "type": "text",
// "value": "bold"
// }]
// }, {
// "type":"text",
// "value":"."
// }]
// }]
// }]
// }
```
Checkout `examples/html-slugify.rs` on how to customizing html export process.
## Features
By now, orgize provides four features:
- **`chrono`**: adds the ability to convert `Timestamp` into `chrono::NaiveDateTime`, disabled by default.
+ `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
- **`indexmap`**: adds the ability to convert `PropertyDrawer` properties into `IndexMap`, disabled by default.
+ `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default.
## API compatibility
+ `syntect`: provides `SyntectHtmlHandler` for highlighting code block, disabled by default.
`element.syntax()` exposes access to the internal syntax tree, along with some rowan low-level APIs.
This can be useful for intricate tasks.
+ `indexmap`: Uses `IndexMap` instead of `HashMap` for properties to preserve their order, disabled by default.
## License
MIT
However, the structure of the internal syntax tree can change between different versions of the library.
Because of this, the result of `element.syntax()` doesn't follow semantic versioning,
which means updates might break your code if it relies on this method.

1
benches/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.org

View file

@ -1,30 +1,39 @@
#![feature(test)]
extern crate test;
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use orgize::Org;
use test::Bencher;
#[bench]
fn org_syntax(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/dev/org-syntax.org
b.iter(|| {
Org::parse(include_str!("org-syntax.org"));
})
const INPUT: &[(&str, &str)] = &[
("doc.org", include_str!("./doc.org")),
("org-faq.org", include_str!("./org-faq.org")),
("org-hacks.org", include_str!("./org-hacks.org")),
(
"org-release-notes.org",
include_str!("./org-release-notes.org"),
),
("org-syntax.org", include_str!("./org-syntax.org")),
];
pub fn bench_parse(c: &mut Criterion) {
let mut group = c.benchmark_group("Org::parse");
for (id, org) in INPUT {
group.throughput(Throughput::Bytes(org.len() as u64));
group.bench_with_input(*id, org, |b, i| b.iter(|| Org::parse(i)));
}
group.finish();
}
#[bench]
fn doc(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/doc.org
b.iter(|| {
Org::parse(include_str!("doc.org"));
})
pub fn bench_to_html(c: &mut Criterion) {
let mut group = c.benchmark_group("Org::to_html");
for (id, org) in INPUT {
group.throughput(Throughput::Bytes(org.len() as u64));
group.bench_with_input(*id, &Org::parse(org), |b, i| b.iter(|| i.to_html()));
}
group.finish();
}
#[bench]
fn org_faq(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/org-faq.org
b.iter(|| {
Org::parse(include_str!("org-faq.org"));
})
}
criterion_group!(benches, bench_parse, bench_to_html);
criterion_main!(benches);

44
development.md Normal file
View file

@ -0,0 +1,44 @@
## Format, test, lint
```shell
cargo fmt -- --check
cargo test --all-features
cargo clippy --allow-dirty --allow-staged
```
## Update snapshot testing
```shell
cargo install cargo-insta
cargo insta test --all-features
cargo insta review
```
## Fuzz testing
```shell
cargo install cargo-fuzz
rustup default nightly
cargo fuzz run fuzz_target_1
```
## Benchmark
```shell
curl -q https://orgmode.org/worg/doc.org --output ./benches/doc.org
curl -q https://orgmode.org/worg/org-faq.org --output ./benches/org-faq.org
curl -q https://orgmode.org/worg/org-hacks.org --output ./benches/org-hacks.org
curl -q https://orgmode.org/worg/org-release-notes.org --output ./benches/org-release-notes.org
curl -q https://orgmode.org/worg/org-syntax.org --output ./benches/org-syntax.org
curl -q https://raw.githubusercontent.com/bzg/org-mode/main/doc/org-manual.org --output ./benches/org-manual.org
cargo bench --bench parse
```
## Benchmark w/ flamegraph
```shell
cargo install flamegraph
cargo flamegraph --bench parse -o baseline.svg -- --bench
# then open baseline.svg with your browser
```

View file

@ -4,7 +4,7 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Headline
- [X] Objects insides headline title
- [ ] Affiliated Keywords
- [x] Affiliated Keywords
## Greater Elements
- [x] Greater Blocks
@ -15,10 +15,10 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [ ] Objects insides inlinetask title
- [x] Plain Lists and Items
- [x] Nested List
- [ ] Nested List Indentation
- [ ] Tag
- [ ] Counter
- [ ] Counter set
- [x] Nested List Indentation
- [x] Tag
- [x] Counter
- [x] Counter set
- [X] Property Drawers
- [X] Tables
@ -26,25 +26,25 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Babel Call
- [x] Blocks
- [ ] Escape characters (`#`,`*`, etc)
- [x] Escape characters (`#`,`*`, etc)
- [ ] Line numbers
- [X] Clock, Diary Sexp and Planning
- [x] Comments
- [x] Fixed Width Areas
- [x] Horizontal Rules
- [x] Keywords
- [ ] LaTeX Environments
- [x] LaTeX Environments
- [X] Node Properties
- [x] Paragraphs
- [X] Table Rows
## Objects
- [ ] Entities and LaTeX Fragments
- [x] Entities and LaTeX Fragments
- [x] Export Snippets
- [x] Footnote References
- [x] Inline Babel Calls and Source Blocks
- [ ] Line Breaks
- [x] Line Breaks
- [x] Links
- [x] Regular link
- [ ] Plain link
@ -53,17 +53,22 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Macros
- [x] Targets and Radio Targets
- [x] Statistics Cookies
- [ ] Subscript and Superscript
- [x] Subscript and Superscript
- [X] Table Cells
- [x] Timestamps
- [x] Text Markup
- [x] bold
- [x] italic
- [x] underline
- [x] verbatim
- [x] code
- [x] strike-through
## Export
- [x] HTML
- [X] Org
- [x] HTML `Org::to_html`
- [X] Org `Org::to_org`
- [ ] LaTeX
- [X] JSON, (via Serde)
## Extra

View file

@ -1,942 +0,0 @@
# Table of Contents
1. [Headlines and Sections](#Headlines_and_Sections)
2. [Affiliated Keywords](#Affiliated_keywords)
3. [Greater Elements](#Greater_Elements)
1. [Greater Blocks](#Greater_Blocks)
2. [Drawers and Property Drawers](#Drawers)
3. [Dynamic Blocks](#Dynamic_Blocks)
4. [Footnote Definitions](#Footnote_Definitions)
5. [Inlinetasks](#Inlinetasks)
6. [Plain Lists and Items](#Plain_Lists_and_Items)
7. [Property Drawers](#Property_Drawers)
8. [Tables](#Tables)
4. [Elements](#Elements)
1. [Babel Call](#Babel_Call)
2. [Blocks](#Blocks)
3. [Clock, Diary Sexp and Planning](#Clock,_Diary_Sexp_and_Planning)
4. [Comments](#Comments)
5. [Fixed Width Areas](#Fixed_Width_Areas)
6. [Horizontal Rules](#Horizontal_Rules)
7. [Keywords](#Keywords)
8. [LaTeX Environments](#LaTeX_Environments)
9. [Node Properties](#Node_Properties)
10. [Paragraphs](#Paragraphs)
11. [Table Rows](#Table_Rows)
5. [Objects](#Objects)
1. [Entities and LaTeX Fragments](#Entities_and_LaTeX_Fragments)
2. [Export Snippets](#Export_Snippets)
3. [Footnote References](#Footnote_References)
4. [Inline Babel Calls and Source
Blocks](#Inline_Babel_Calls_and_Source_Blocks)
5. [Line Breaks](#Line_Breaks)
6. [Links](#Links)
7. [Macros](#Macros)
8. [Targets and Radio Targets](#Targets_and_Radio_Targets)
9. [Statistics Cookies](#Statistics_Cookies)
10. [Subscript and Superscript](#Subscript_and_Superscript)
11. [Table Cells](#Table_Cells)
12. [Timestamps](#Timestamp)
13. [Text Markup](#Emphasis_Markers)
This document describes and comments Org syntax as it is currently read by its
parser (Org Elements) and, therefore, by the export framework. It also includes
a few comments on that syntax.
A core concept in this syntax is that only headlines, sections, planning lines
and property drawers are context-free<sup><a id="fnr.1" class="footref"
href="#fn.1">1</a></sup><sup>, </sup><sup><a id="fnr.2" class="footref"
href="#fn.2">2</a></sup>. Every other syntactical part only exists within
specific environments.
Three categories are used to classify these environments: **Greater elements**,
**elements**, and **objects**, from the broadest scope to the narrowest. The
word **element** is used for both Greater and non-Greater elements, the context
should make that clear.
The paragraph is the unit of measurement. An element defines syntactical parts
that are at the same level as a paragraph, i.e. which cannot contain or be
included in a paragraph. An object is a part that could be included in an
element. Greater elements are all parts that can contain an element.
Empty lines belong to the largest element ending before them. For example, in a
list, empty lines between items belong are part of the item before them, but
empty lines at the end of a list belong to the plain list element.
Unless specified otherwise, case is not significant.
<a id="Headlines_and_Sections"></a>
# Headlines and Sections
A headline is defined as:
STARS KEYWORD PRIORITY TITLE TAGS
STARS is a string starting at column 0, containing at least one asterisk (and up
to `org-inlinetask-min-level` if `org-inlinetask` library is loaded) and ended
by a space character. The number of asterisks is used to define the level of the
headline. It's the sole compulsory part of a headline.
KEYWORD is a TODO keyword, which has to belong to the list defined in
`org-todo-keywords-1`. Case is significant.
PRIORITY is a priority cookie, i.e. a single letter preceded by a hash sign #
and enclosed within square brackets.
TITLE can be made of any character but a new line. Though, it will match after
every other part have been matched.
TAGS is made of words containing any alpha-numeric character, underscore, at
sign, hash sign or percent sign, and separated with colons.
Examples of valid headlines include:
*
** DONE
*** Some e-mail
**** TODO [#A] COMMENT Title :tag:a2%:
If the first word appearing in the title is `COMMENT`, the headline will be
considered as **commented**. Case is significant.
If its title is `org-footnote-section`, it will be considered as a **footnote
section**. Case is significant.
If `ARCHIVE` is one of its tags, it will be considered as **archived**. Case is
significant.
A headline contains directly one section (optionally), followed by any number of
deeper level headlines.
A section contains directly any greater element or element. Only a headline can
contain a section. As an exception, text before the first headline in the
document also belongs to a section.
As an example, consider the following document:
An introduction.
* A Headline
Some text.
** Sub-Topic 1
** Sub-Topic 2
*** Additional entry
Its internal structure could be summarized as:
(document
(section)
(headline
(section)
(headline)
(headline
(headline))))
<a id="Affiliated_keywords"></a>
# Affiliated Keywords
With the exception of [inlinetasks](#Inlinetasks),
[items](#Plain_Lists_and_Items), [planning](#Clock,_Diary_Sexp_and_Planning),
[clocks](#Clock,_Diary_Sexp_and_Planning), [node properties](#Node_Properties)
and [table rows](#Table_Rows), every other element type can be assigned
attributes.
This is done by adding specific keywords, named **affiliated keywords**, just
above the element considered, no blank line allowed.
Affiliated keywords are built upon one of the following patterns: `#+KEY: VALUE`,
`#+KEY[OPTIONAL]: VALUE` or `#+ATTR_BACKEND: VALUE`.
KEY is either `CAPTION`, `HEADER`, `NAME`, `PLOT` or `RESULTS` string.
BACKEND is a string constituted of alpha-numeric characters, hyphens or
underscores.
OPTIONAL and VALUE can contain any character but a new line. Only `CAPTION` and
`RESULTS` keywords can have an optional value.
An affiliated keyword can appear more than once if KEY is either `CAPTION` or
`HEADER` or if its pattern is `#+ATTR_BACKEND: VALUE`.
`CAPTION`, `AUTHOR`, `DATE` and `TITLE` keywords can contain objects in their
value and their optional value, if applicable.
<a id="Greater_Elements"></a>
# Greater Elements
Unless specified otherwise, greater elements can contain directly any other
element or greater element excepted:
- elements of their own type,
- [node properties](#Node_Properties), which can only be found in [property
drawers](#Property_Drawers),
- [items](#Plain_Lists_and_Items), which can only be found in [plain
lists](#Plain_Lists_and_Items).
<a id="Greater_Blocks"></a>
## Greater Blocks
Greater blocks consist in the following pattern:
#+BEGIN_NAME PARAMETERS
CONTENTS
#+END_NAME
NAME can contain any non-whitespace character.
PARAMETERS can contain any character other than new line, and can be omitted.
If NAME is `CENTER`, it will be a **center block**. If it is `QUOTE`, it will be
a **quote block**.
If the block is neither a center block, a quote block or a [block
element](#Blocks), it will be a **special block**.
CONTENTS can contain any element, except : a line `#+END_NAME` on its own. Also
lines beginning with STARS must be quoted by a comma.
<a id="Drawers"></a>
## Drawers and Property Drawers
Pattern for drawers is:
:NAME:
CONTENTS
:END:
NAME can contain word-constituent characters, hyphens and underscores.
CONTENTS can contain any element but another drawer.
<a id="Dynamic_Blocks"></a>
## Dynamic Blocks
Pattern for dynamic blocks is:
#+BEGIN: NAME PARAMETERS
CONTENTS
#+END:
NAME cannot contain any whitespace character.
PARAMETERS can contain any character and can be omitted.
<a id="Footnote_Definitions"></a>
## Footnote Definitions
Pattern for footnote definitions is:
[fn:LABEL] CONTENTS
It must start at column 0.
LABEL is either a number or follows the pattern `fn:WORD`, where word can
contain any word-constituent character, hyphens and underscore characters.
CONTENTS can contain any element excepted another footnote definition. It ends
at the next footnote definition, the next headline, two consecutive empty lines
or the end of buffer.
<a id="Inlinetasks"></a>
## Inlinetasks
Inlinetasks are defined by `org-inlinetask-min-level` contiguous asterisk
characters starting at column 0, followed by a whitespace character.
Optionally, inlinetasks can be ended with a string constituted of
`org-inlinetask-min-level` contiguous asterisk characters starting at column 0,
followed by a space and the `END` string.
Inlinetasks are recognized only after `org-inlinetask` library is loaded.
<a id="Plain_Lists_and_Items"></a>
## Plain Lists and Items
Items are defined by a line starting with the following pattern:
`BULLET COUNTER-SET CHECK-BOX TAG`, in which only BULLET is mandatory.
BULLET is either an asterisk, a hyphen, a plus sign character or follows either
the pattern `COUNTER.` or `COUNTER)`. In any case, BULLET is follwed by a
whitespace character or line ending.
COUNTER can be a number or a single letter.
COUNTER-SET follows the pattern [@COUNTER].
CHECK-BOX is either a single whitespace character, a `X` character or a hyphen,
enclosed within square brackets.
TAG follows `TAG-TEXT ::` pattern, where TAG-TEXT can contain any character but
a new line.
An item ends before the next item, the first line less or equally indented than
its starting line, or two consecutive empty lines. Indentation of lines within
other greater elements do not count, neither do inlinetasks boundaries.
A plain list is a set of consecutive items of the same indentation. It can only
directly contain items.
If first item in a plain list has a counter in its bullet, the plain list will
be an **ordered plain-list**. If it contains a tag, it will be a **descriptive
list**. Otherwise, it will be an **unordered list**. List types are mutually
exclusive.
For example, consider the following excerpt of an Org document:
1. item 1
2. [X] item 2
- some tag :: item 2.1
Its internal structure is as follows:
(ordered-plain-list
(item)
(item
(descriptive-plain-list
(item))))
<a id="Property_Drawers"></a>
## Property Drawers
Property drawers are a special type of drawer containing properties attached to
a headline. They are located right after a [headline](#Headlines_and_Sections)
and its [planning](#Clock,_Diary_Sexp_and_Planning) information.
HEADLINE
PROPERTYDRAWER
HEADLINE
PLANNING
PROPERTYDRAWER
PROPERTYDRAWER follows the pattern
:PROPERTIES:
CONTENTS
:END:
where CONTENTS consists of zero or more [node properties](#Node_Properties).
<a id="Tables"></a>
## Tables
Tables start at lines beginning with either a vertical bar or the `+-` string
followed by plus or minus signs only, assuming they are not preceded with lines
of the same type. These lines can be indented.
A table starting with a vertical bar has `org` type. Otherwise it has `table.el`
type.
Org tables end at the first line not starting with a vertical bar. Table.el
tables end at the first line not starting with either a vertical line or a plus
sign. Such lines can be indented.
An org table can only contain table rows. A table.el table does not contain
anything.
One or more `#+TBLFM: FORMULAS` lines, where `FORMULAS` can contain any
character, can follow an org table.
<a id="Elements"></a>
# Elements
Elements cannot contain any other element.
Only [keywords](#Keywords) whose name belongs to
`org-element-document-properties`, [verse blocks](#Blocks) ,
[paragraphs](#Paragraphs) and [table rows](#Table_Rows) can contain objects.
<a id="Babel_Call"></a>
## Babel Call
Pattern for babel calls is:
#+CALL: VALUE
VALUE is optional. It can contain any character but a new line.
<a id="Blocks"></a>
## Blocks
Like [greater blocks](#Greater_Blocks), pattern for blocks is:
#+BEGIN_NAME DATA
CONTENTS
#+END_NAME
NAME cannot contain any whitespace character.
1. If NAME is `COMMENT`, it will be a **comment block**.
2. If it is `EXAMPLE`, it will be an **example block**.
3. If it is `EXPORT`, it will be an **export block**.
4. If it is `SRC`, it will be a **source block**.
5. If it is `VERSE`, it will be a **verse block**.
DATA can contain any character but a new line. It can be ommitted, unless the
block is either a **source block** or an **export block**.
In the latter case, it should be constituted of a single word.
In the former case, it must follow the pattern `LANGUAGE SWITCHES ARGUMENTS`,
where SWITCHES and ARGUMENTS are optional.
LANGUAGE cannot contain any whitespace character.
SWITCHES is made of any number of `SWITCH` patterns, separated by blank lines.
A SWITCH pattern is either `-l FORMAT`, where FORMAT can contain any character
but a double quote and a new line, `-S` or `+S`, where S stands for a single
letter.
ARGUMENTS can contain any character but a new line.
CONTENTS can contain any character, including new lines. Though it will only
contain Org objects if the block is a verse block. Otherwise, CONTENTS will not
be parsed.
<a id="Clock,_Diary_Sexp_and_Planning"></a>
## Clock, Diary Sexp and Planning
A clock follows either of the patterns below:
CLOCK: INACTIVE-TIMESTAMP
CLOCK: INACTIVE-TIMESTAMP-RANGE DURATION
INACTIVE-TIMESTAMP, resp. INACTIVE-TIMESTAMP-RANGE, is an inactive, resp.
inactive range, [timestamp](#Timestamp) object.
DURATION follows the pattern:
=> HH:MM
HH is a number containing any number of digits. MM is a two digit numbers.
A diary sexp is a line starting at column 0 with `%%(` string. It can then
contain any character besides a new line.
A planning is an element with the following pattern:
HEADLINE
PLANNING
where HEADLINE is a [headline](#Headlines_and_Sections) element and PLANNING is
a line filled with INFO parts, where each of them follows the pattern:
KEYWORD: TIMESTAMP
KEYWORD is either `DEADLINE`, `SCHEDULED` or `CLOSED`. TIMESTAMP is a
[timestamp](#Timestamp) object.
In particular, no blank line is allowed between PLANNING and HEADLINE.
<a id="Comments"></a>
## Comments
A **comment line** starts with a hash signe and a whitespace character or an end
of line.
Comments can contain any number of consecutive comment lines.
<a id="Fixed_Width_Areas"></a>
## Fixed Width Areas
A **fixed-width line** start with a colon character and a whitespace or an end
of line.
Fixed width areas can contain any number of consecutive fixed-width lines.
<a id="Horizontal_Rules"></a>
## Horizontal Rules
A horizontal rule is a line made of at least 5 consecutive hyphens. It can be
indented.
<a id="Keywords"></a>
## Keywords
Keywords follow the syntax:
#+KEY: VALUE
KEY can contain any non-whitespace character, but it cannot be equal to `CALL`
or any affiliated keyword.
VALUE can contain any character excepted a new line.
If KEY belongs to `org-element-document-properties`, VALUE can contain objects.
<a id="LaTeX_Environments"></a>
## LaTeX Environments
Pattern for LaTeX environments is:
\begin{NAME} CONTENTS \end{NAME}
NAME is constituted of alpha-numeric or asterisk characters.
CONTENTS can contain anything but the `\end{NAME}` string.
<a id="Node_Properties"></a>
## Node Properties
Node properties can only exist in [property drawers](#Property_Drawers). Their
pattern is any of the following
:NAME: VALUE
:NAME+: VALUE
:NAME:
:NAME+:
NAME can contain any non-whitespace character but cannot end with a plus sign.
It cannot be the empty string.
VALUE can contain anything but a newline character.
<a id="Paragraphs"></a>
## Paragraphs
Paragraphs are the default element, which means that any unrecognized context is
a paragraph.
Empty lines and other elements end paragraphs.
Paragraphs can contain every type of object.
<a id="Table_Rows"></a>
## Table Rows
A table rows is either constituted of a vertical bar and any number of [table
cells](#Table_Cells) or a vertical bar followed by a hyphen.
In the first case the table row has the **standard** type. In the second case,
it has the **rule** type.
Table rows can only exist in [tables](#Tables).
<a id="Objects"></a>
# Objects
Objects can only be found in the following locations:
- [affiliated keywords](#Affiliated_keywords) defined in
`org-element-parsed-keywords`,
- [document properties](#Keywords),
- [headline](#Headlines_and_Sections) titles,
- [inlinetask](#Inlinetasks) titles,
- [item](#Plain_Lists_and_Items) tags,
- [paragraphs](#Paragraphs),
- [table cells](#Table_Cells),
- [table rows](#Table_Rows), which can only contain table cell objects,
- [verse blocks](#Blocks).
Most objects cannot contain objects. Those which can will be specified.
<a id="Entities_and_LaTeX_Fragments"></a>
## Entities and LaTeX Fragments
An entity follows the pattern:
\NAME POST
where NAME has a valid association in either `org-entities` or
`org-entities-user`.
POST is the end of line, `{}` string, or a non-alphabetical character. It isn't
separated from NAME by a whitespace character.
A LaTeX fragment can follow multiple patterns:
\NAME BRACKETS
\(CONTENTS\)
\[CONTENTS\]
$$CONTENTS$$
PRE$CHAR$POST
PRE$BORDER1 BODY BORDER2$POST
NAME contains alphabetical characters only and must not have an association in
either **org-entities** or **org-entities-user**.
BRACKETS is optional, and is not separated from NAME with white spaces. It may
contain any number of the following patterns:
[CONTENTS1]
{CONTENTS2}
where CONTENTS1 can contain any characters excepted `{` `}`, `[` `]` and newline
and CONTENTS2 can contain any character excepted `{`, `}` and newline.
CONTENTS can contain any character but cannot contain `\\)` in the second
template or `\\]` in the third one.
PRE is either the beginning of line or a character different from `$`.
CHAR is a non-whitespace character different from `.`, `,`, `?`, `;`, `'` or a
double quote.
POST is any punctuation (including parentheses and quotes) or space character,
or the end of line.
BORDER1 is a non-whitespace character different from `.`, `,`, `;` and `$`.
BODY can contain any character excepted `$`, and may not span over more than 3
lines.
BORDER2 is any non-whitespace character different from `,`, `.` and `$`.
---
> It would introduce incompatibilities with previous Org versions, but support
> for `$...$` (and for symmetry, `$$...$$`) constructs ought to be removed.
>
> They are slow to parse, fragile, redundant and imply false positives. &#x2014;
> ngz
<a id="Export_Snippets"></a>
## Export Snippets
Patter for export snippets is:
@@NAME:VALUE@@
NAME can contain any alpha-numeric character and hyphens.
VALUE can contain anything but `@@` string.
<a id="Footnote_References"></a>
## Footnote References
There are four patterns for footnote references:
[fn:LABEL]
[fn:LABEL:DEFINITION]
[fn::DEFINITION]
LABEL can contain any word constituent character, hyphens and underscores.
DEFINITION can contain any character. Though opening and closing square brackets
must be balanced in it. It can contain any object encountered in a paragraph,
even other footnote references.
If the reference follows the second pattern, it is called an **inline
footnote**. If it follows the third one, i.e. if LABEL is omitted, it is an
**anonymous footnote**.
<a id="Inline_Babel_Calls_and_Source_Blocks"></a>
## Inline Babel Calls and Source Blocks
Inline Babel calls follow any of the following patterns:
call_NAME(ARGUMENTS)
call_NAME[HEADER](ARGUMENTS)[HEADER]
NAME can contain any character besides `(`, `)` and `\n`.
HEADER can contain any character besides `]` and `\n`.
ARGUMENTS can contain any character besides `)` and `\n`.
Inline source blocks follow any of the following patterns:
src_LANG{BODY}
src_LANG[OPTIONS]{BODY}
LANG can contain any non-whitespace character.
OPTIONS and BODY can contain any character but `\n`.
<a id="Line_Breaks"></a>
## Line Breaks
A line break consists in `\\\SPACE` pattern at the end of an otherwise non-empty
line.
SPACE can contain any number of tabs and spaces, including 0.
<a id="Links"></a>
## Links
There are 4 major types of links:
PRE1 RADIO POST1 ("radio" link)
<PROTOCOL:PATH> ("angle" link)
PRE2 PROTOCOL:PATH2 POST2 ("plain" link)
[[PATH3]DESCRIPTION] ("regular" link)
PRE1 and POST1, when they exist, are non alphanumeric characters.
RADIO is a string matched by some [radio target](#Targets_and_Radio_Targets). It
may contain [entities](#Entities_and_LaTeX_Fragments), [latex
fragments](#Entities_and_LaTeX_Fragments),
[subscript](#Subscript_and_Superscript) and
[superscript](#Subscript_and_Superscript).
PROTOCOL is a string among `org-link-types`.
PATH can contain any character but `]`, `<`, `>` and `\n`.
PRE2 and POST2, when they exist, are non word constituent characters.
PATH2 can contain any non-whitespace character excepted `(`, `)`, `<` and `>`.
It must end with a word-constituent character, or any non-whitespace
non-punctuation character followed by `/`.
DESCRIPTION must be enclosed within square brackets. It can contain any
character but square brackets. It can contain any object found in a paragraph
excepted a [footnote reference](#Footnote_References), a [radio
target](#Targets_and_Radio_Targets) and a [line break](#Line_Breaks). It cannot
contain another link either, unless it is a plain or angular link.
DESCRIPTION is optional.
PATH3 is built according to the following patterns:
FILENAME ("file" type)
PROTOCOL:PATH4 ("PROTOCOL" type)
PROTOCOL://PATH4 ("PROTOCOL" type)
id:ID ("id" type)
#CUSTOM-ID ("custom-id" type)
(CODEREF) ("coderef" type)
FUZZY ("fuzzy" type)
FILENAME is a file name, either absolute or relative.
PATH4 can contain any character besides square brackets.
ID is constituted of hexadecimal numbers separated with hyphens.
PATH4, CUSTOM-ID, CODEREF and FUZZY can contain any character besides square
brackets.
<a id="Macros"></a>
## Macros
Macros follow the pattern:
{{{NAME(ARGUMENTS)}}}
NAME must start with a letter and can be followed by any number of alpha-numeric
characters, hyphens and underscores.
ARGUMENTS can contain anything but `}}}` string. Values within ARGUMENTS are
separated by commas. Non-separating commas have to be escaped with a backslash
character.
<a id="Targets_and_Radio_Targets"></a>
## Targets and Radio Targets
Radio targets follow the pattern:
<<<CONTENTS>>>
CONTENTS can be any character besides `<`, `>` and `\n`. It cannot start or end
with a whitespace character. As far as objects go, it can contain [text
markup](#Emphasis_Markers), [entities](#Entities_and_LaTeX_Fragments), [latex
fragments](#Entities_and_LaTeX_Fragments),
[subscript](#Subscript_and_Superscript) and
[superscript](#Subscript_and_Superscript) only.
Targets follow the pattern:
<<TARGET>>
TARGET can contain any character besides `<`, `>` and `\n`. It cannot start or
end with a whitespace character. It cannot contain any object.
<a id="Statistics_Cookies"></a>
## Statistics Cookies
Statistics cookies follow either pattern:
[PERCENT%]
[NUM1/NUM2]
PERCENT, NUM1 and NUM2 are numbers or the empty string.
<a id="Subscript_and_Superscript"></a>
## Subscript and Superscript
Pattern for subscript is:
CHAR_SCRIPT
Pattern for superscript is:
CHAR^SCRIPT
CHAR is any non-whitespace character.
SCRIPT can be `*` or an expression enclosed in parenthesis (respectively curly
brackets), possibly containing balanced parenthesis (respectively curly
brackets).
SCRIPT can also follow the pattern:
SIGN CHARS FINAL
SIGN is either a plus sign, a minus sign, or an empty string.
CHARS is any number of alpha-numeric characters, commas, backslashes and dots,
or an empty string.
FINAL is an alpha-numeric character.
There is no white space between SIGN, CHARS and FINAL.
<a id="Table_Cells"></a>
## Table Cells
Table cells follow the pattern:
CONTENTS SPACES|
CONTENTS can contain any character excepted a vertical bar.
SPACES contains any number of space characters, including zero. It can be used
to align properly the table.
The final bar may be replaced with a newline character for the last cell in row.
<a id="Timestamp"></a>
## Timestamps
There are seven possible patterns for timestamps:
<%%(SEXP)> (diary)
<DATE TIME REPEATER-OR-DELAY> (active)
[DATE TIME REPEATER-OR-DELAY] (inactive)
<DATE TIME REPEATER-OR-DELAY>--<DATE TIME REPEATER-OR-DELAY> (active range)
<DATE TIME-TIME REPEATER-OR-DELAY> (active range)
[DATE TIME REPEATER-OR-DELAY]--[DATE TIME REPEATER-OR-DELAY] (inactive range)
[DATE TIME-TIME REPEATER-OR-DELAY] (inactive range)
SEXP can contain any character excepted `>` and `\n`.
DATE follows the pattern:
YYYY-MM-DD DAYNAME
`Y`, `M` and `D` are digits. DAYNAME can contain any non whitespace-character
besides `+`, `-`, `]`, `>`, a digit or `\n`.
TIME follows the pattern `H:MM`. `H` can be one or two digit long and can start
with 0.
REPEATER-OR-DELAY follows the pattern:
MARK VALUE UNIT
MARK is `+` (cumulate type), `++` (catch-up type) or `.+` (restart type) for a
repeater, and `-` (all type) or `--` (first type) for warning delays.
VALUE is a number.
UNIT is a character among `h` (hour), `d` (day), `w` (week), `m` (month), `y`
(year).
MARK, VALUE and UNIT are not separated by whitespace characters.
There can be two REPEATER-OR-DELAY in the timestamp: one as a repeater and one
as a warning delay.
<a id="Emphasis_Markers"></a>
## Text Markup
Text markup follows the pattern:
PRE MARKER CONTENTS MARKER POST
PRE is a whitespace character, `(`, `{` `'` or a double quote. It can also be a
beginning of line.
MARKER is a character among `*` (bold), `=` (verbatim), `/` (italic), `+`
(strike-through), `_` (underline), `~` (code).
CONTENTS is a string following the pattern:
BORDER BODY BORDER
BORDER can be any non-whitespace character excepted `,`, `'` or a double quote.
BODY can contain contain any character but may not span over more than 3 lines.
BORDER and BODY are not separated by whitespaces.
CONTENTS can contain any object encountered in a paragraph when markup is
**bold**, **italic**, **strike-through** or **underline**.
POST is a whitespace character, `-`, `.`, `,`, `:`, `!`, `?`, `'`, `)`, `}` or a
double quote. It can also be an end of line.
PRE, MARKER, CONTENTS, MARKER and POST are not separated by whitespace
characters.
---
> All of this is wrong if `org-emphasis-regexp-components` or
> `org-emphasis-alist` are modified.
>
> This should really be simplified.
>
> Also, CONTENTS should be anything within code and verbatim emphasis, by
> definition. &#x2014; ngz
# Footnotes
<sup><a id="fn.1" href="#fnr.1">1</a></sup> In particular, the parser requires
stars at column 0 to be quoted by a comma when they do not define a headline.
<sup><a id="fn.2" href="#fnr.2">2</a></sup> It also means that only headlines
and sections can be recognized just by looking at the beginning of the line.
Planning lines and property drawers can be recognized by looking at one or two
lines above.
As a consequence, using `org-element-at-point` or `org-element-context` will
move up to the parent headline, and parse top-down from there until context
around original location is found.

View file

@ -1,81 +0,0 @@
use std::convert::From;
use std::env::args;
use std::fs;
use std::io::{Error as IOError, Write};
use std::result::Result;
use std::string::FromUtf8Error;
use orgize::export::{DefaultHtmlHandler, HtmlHandler};
use orgize::{Element, Org};
use slugify::slugify;
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
#[derive(Default)]
struct MyHtmlHandler(DefaultHtmlHandler);
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
if title.level > 6 {
return Err(MyError::Heading);
} else {
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">",
title.level,
slugify!(&title.raw),
)?;
}
} else {
// fallthrough to default handler
self.0.start(w, element)?;
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
write!(w, "</a></h{}>", title.level)?;
} else {
self.0.end(w, element)?;
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?)?;
let mut writer = Vec::new();
let mut handler = MyHtmlHandler::default();
Org::parse(&contents).write_html_custom(&mut writer, &mut handler)?;
println!("{}", String::from_utf8(writer)?);
}
Ok(())
}

44
examples/html-slugify.rs Normal file
View file

@ -0,0 +1,44 @@
//! ```bash
//! cargo run --example html-slugify '* hello world!'
//! ```
use orgize::{
export::HtmlExport,
export::{from_fn_with_ctx, Container, Event, Traverser},
Org,
};
use slugify::slugify;
use std::cmp::min;
use std::env::args;
fn main() {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-mode-string>", args[0]);
} else {
let mut html_export = HtmlExport::default();
let mut handler = from_fn_with_ctx(|event, ctx| {
if let Event::Enter(Container::Headline(headline)) = event {
let level = min(headline.level(), 6);
let title = headline.title().map(|e| e.to_string()).collect::<String>();
html_export.push_str(format!(
"<h{level}><a id=\"{0}\" href=\"#{0}\">",
slugify!(&title)
));
for elem in headline.title() {
html_export.element(elem, ctx);
}
html_export.push_str(format!("</a></h{level}>"));
} else {
// forward to default html export
html_export.event(event, ctx);
}
});
Org::parse(&args[1]).traverse(&mut handler);
println!("{}", html_export.finish());
}
}

View file

@ -1,19 +0,0 @@
use orgize::Org;
use std::env::args;
use std::fs;
use std::io::Result;
fn main() -> Result<()> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?).unwrap();
for event in Org::parse(&contents).iter() {
println!("{:?}", event);
}
}
Ok(())
}

View file

@ -1,17 +0,0 @@
use orgize::Org;
use serde_json::to_string;
use std::env::args;
use std::fs;
use std::io::Result;
fn main() -> Result<()> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?).unwrap();
println!("{}", to_string(&Org::parse(&contents)).unwrap());
}
Ok(())
}

23
examples/markdown.rs Normal file
View file

@ -0,0 +1,23 @@
//! ```bash
//! cargo run --example markdown test.org
//! ```
use orgize::{export::MarkdownExport, Org};
use std::{env::args, fs};
fn main() {
let args: Vec<_> = args().collect();
if args.len() < 2 {
panic!("Usage: {} <org-mode-file>", args[0]);
}
let content = fs::read_to_string(&args[1]).unwrap();
let mut export = MarkdownExport::default();
Org::parse(content).traverse(&mut export);
fs::write(format!("{}.md", &args[1]), export.finish()).unwrap();
println!("Wrote to {}.md", &args[1]);
}

30
examples/parse.rs Normal file
View file

@ -0,0 +1,30 @@
//! ```bash
//! cargo run --example parse '* hello\n** /world/!'
//! ```
use orgize::Org;
use rowan::ast::AstNode;
use std::env::args;
use tracing_subscriber::fmt::format::FmtSpan;
fn main() {
let args: Vec<_> = args().collect();
tracing_subscriber::fmt()
.without_time()
.with_file(true)
.with_span_events(FmtSpan::NEW)
.with_line_number(true)
.with_max_level(tracing::Level::TRACE)
.with_file(false)
.with_line_number(false)
.init();
if args.len() < 2 {
eprintln!("Usage: {} <org-mode-string>", args[0]);
} else {
let s = &args[1].replace(r"\n", "\n").replace(r"\r", "\r");
let org = Org::parse(s);
println!("{:#?}", org.document().syntax());
}
}

1
fuzz/.gitignore vendored
View file

@ -1,3 +1,4 @@
target
corpus
artifacts
coverage

View file

@ -1,20 +1,27 @@
[package]
name = "orgize-fuzz"
version = "0.0.1"
authors = ["Automatically generated"]
version = "0.0.0"
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }
orgize = { path = ".." }
libfuzzer-sys = "0.4"
[dependencies.orgize]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "fuzz_target_1"
path = "fuzz_targets/fuzz_target_1.rs"
test = false
doc = false

View file

@ -1,14 +1,7 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate orgize;
use orgize::Org;
#[cfg_attr(rustfmt, rustfmt_skip)]
libfuzzer_sys::fuzz_target!(|data: &[u8]| {
if let Ok(s) = std::str::from_utf8(data) {
let _ = Org::parse(s);
if let Ok(utf8) = std::str::from_utf8(data) {
let _ = orgize::Org::parse(utf8);
}
});

View file

@ -0,0 +1,52 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, AffiliatedKeyword, Token};
impl AffiliatedKeyword {
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.key(), "CAPTION");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("keyword must contains TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert!(keyword.optional().is_none());
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.optional().unwrap(), "OPTIONAL");
/// ```
pub fn optional(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|it| it.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), " VALUE");
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]:VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), "VALUE");
/// ```
pub fn value(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
}
}

194
src/ast/block.rs Normal file
View file

@ -0,0 +1,194 @@
use super::{
filter_token, CenterBlock, CommentBlock, DynBlock, ExampleBlock, ExportBlock, QuoteBlock,
SourceBlock, SpecialBlock, SyntaxKind, Token, VerseBlock,
};
use rowan::TextSize;
impl SourceBlock {
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "c");
/// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "javascript");
///
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.language().is_none());
/// ````
pub fn language(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20");
/// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20 -r");
///
/// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// ````
pub fn switches(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle yes");
/// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle");
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.parameters().is_none());
/// ````
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS))
}
/// Return unescaped source code string
///
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse(r#"
/// #+begin_src
/// #+end_src
/// "#).first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "");
///
/// let block = Org::parse(r#"
/// #+begin_src
/// ,* foo
/// ,#+ bar
/// #+end_src
/// "#).first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "* foo\n#+ bar\n");
/// ````
pub fn value(&self) -> String {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, value| acc + &value)
}
}
impl ExportBlock {
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse("#+begin_export html\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.ty().unwrap(), "html");
///
/// let block = Org::parse("#+begin_export\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert!(block.ty().is_none());
/// ````
pub fn ty(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE))
}
/// Returns export block contents
///
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse(r#"
/// #+begin_export html
/// <style>.red { color: red; }</style>
/// #+end_export
/// "#).first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.value(), "<style>.red { color: red; }</style>\n");
///
/// let block = Org::parse(r#"
/// #+BEGIN_EXPORT org
/// ,#+BEGIN_EXPORT html
/// <style>.red { color: red; }</style>
/// ,#+END_EXPORT
/// #+END_EXPORT
/// "#).first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.value(), r#"#+BEGIN_EXPORT html
/// <style>.red { color: red; }</style>
/// #+END_EXPORT
/// "#);
/// ```
pub fn value(&self) -> String {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, value| acc + &value)
}
}
macro_rules! impl_content_border {
($block:ident) => {
impl $block {
/// Beginning position of block content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::BLOCK_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "block must contains BLOCK_BEGIN");
TextSize::default()
})
}
/// Ending position of block content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::BLOCK_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "block must contains BLOCK_END");
TextSize::default()
})
}
}
};
}
impl_content_border!(SourceBlock);
impl_content_border!(ExportBlock);
impl_content_border!(CenterBlock);
impl_content_border!(CommentBlock);
impl_content_border!(ExampleBlock);
impl_content_border!(QuoteBlock);
impl_content_border!(SpecialBlock);
impl_content_border!(VerseBlock);
impl_content_border!(DynBlock);

58
src/ast/clock.rs Normal file
View file

@ -0,0 +1,58 @@
use rowan::ast::support;
use crate::{ast::Token, SyntaxKind};
use super::{Clock, Timestamp};
impl Clock {
pub fn value(&self) -> Option<Timestamp> {
support::child(&self.syntax)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.duration().is_none());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert_eq!(clock.duration().unwrap(), "12:00");
///
/// ```
pub fn duration(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|t| t.kind() != SyntaxKind::DOUBLE_ARROW)
.skip(1)
.find(|t| t.kind() != SyntaxKind::WHITESPACE)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(!clock.is_closed());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(clock.is_closed());
/// ```
pub fn is_closed(&self) -> bool {
self.syntax
.children_with_tokens()
.any(|t| t.kind() == SyntaxKind::DOUBLE_ARROW)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.is_running());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(!clock.is_running());
/// ```
pub fn is_running(&self) -> bool {
!self.is_closed()
}
}

111
src/ast/cloze.rs Normal file
View file

@ -0,0 +1,111 @@
use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode};
use rowan::{ast::AstNode, TextRange, TextSize};
use super::Token;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Cloze {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for Cloze {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool {
kind == SyntaxKind::CLOZE
}
fn cast(node: SyntaxNode) -> Option<Cloze> {
Self::can_cast(node.kind()).then(|| Cloze { syntax: node })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl Cloze {
/// Beginning position of this element
pub fn start(&self) -> TextSize {
self.syntax.text_range().start()
}
/// Ending position of this element
pub fn end(&self) -> TextSize {
self.syntax.text_range().end()
}
/// Range of this element
pub fn text_range(&self) -> TextRange {
self.syntax.text_range()
}
/// Raw text of this element
pub fn raw(&self) -> String {
self.syntax.to_string()
}
pub fn text(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children_with_tokens()
.skip(1)
.take_while(|n| n.kind() != SyntaxKind::R_CURLY)
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "text");
/// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$");
/// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] ");
/// ```
pub fn text_raw(&self) -> String {
self.text()
.fold(String::new(), |acc, e| acc + &e.to_string())
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.hint().is_none());
/// let cloze = Org::parse("{{text}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "");
/// let cloze = Org::parse("{{text}{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "hint");
/// ```
pub fn hint(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_CURLY)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.id().is_none());
/// let cloze = Org::parse("{{text}@}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "");
/// let cloze = Org::parse("{{text}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "id");
/// ```
pub fn id(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::AT)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
}

20
src/ast/comment.rs Normal file
View file

@ -0,0 +1,20 @@
use crate::SyntaxKind;
use super::{filter_token, Comment};
impl Comment {
/// Contents without pound signs
///
/// ```rust
/// use orgize::{ast::Comment, Org};
///
/// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::<Comment>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

88
src/ast/document.rs Normal file
View file

@ -0,0 +1,88 @@
use rowan::ast::AstNode;
use crate::Org;
use super::{Document, Keyword, PropertyDrawer};
impl Document {
/// Returns an iterator of keywords in zeroth section
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse(r#"
/// #+TITLE: hello
/// #+TITLE: world
/// #+DATE: today
/// #+AUTHOR: poi
/// * headline
/// #+SOMETHING:"#);
/// let doc = org.first_node::<Document>().unwrap();
/// assert_eq!(doc.keywords().count(), 4);
/// ```
pub fn keywords(&self) -> impl Iterator<Item = Keyword> {
self.section()
.into_iter()
.flat_map(|section| section.syntax.children().filter_map(Keyword::cast))
}
/// Returns the value in top-level `#+TITLE`
///
/// Multiple `#+TITLE` are joined with spaces.
///
/// Returns `None` if file doesn't contain `#+TITLE`
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse("#+TITLE: hello\n#+TITLE: world");
/// let doc = org.first_node::<Document>().unwrap();
/// assert_eq!(doc.title().unwrap(), "hello world");
///
/// let org = Org::parse("");
/// let doc = org.first_node::<Document>().unwrap();
/// assert!(doc.title().is_none());
/// ```
pub fn title(&self) -> Option<String> {
self.keywords()
.filter(|kw| kw.key().eq_ignore_ascii_case("TITLE"))
.fold(Option::<String>::None, |acc, cur| {
let mut s = acc.unwrap_or_default();
if !s.is_empty() {
s.push(' ');
}
s.push_str(cur.value().trim());
Some(s)
})
}
/// Returns top-level properties drawer
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse(r#":PROPERTIES:
/// :ID: 20220718T085035.042592
/// :END:
/// #+TITLE: Complete Computing"#);
///
/// let properties = org.document().properties().unwrap();
/// assert_eq!(properties.to_hash_map().len(), 1);
/// assert_eq!(properties.get("ID").unwrap(), "20220718T085035.042592");
/// ```
pub fn properties(&self) -> Option<PropertyDrawer> {
rowan::ast::support::child(&self.syntax)
}
}
impl Org {
/// Equals to `self.document().title()`, see [Document::title]
pub fn title(&self) -> Option<String> {
self.document().title()
}
/// Equals to `self.document().keywords()`, see [Document::keywords]
pub fn keywords(&self) -> impl Iterator<Item = Keyword> {
self.document().keywords()
}
}

140
src/ast/drawer.rs Normal file
View file

@ -0,0 +1,140 @@
use rowan::TextSize;
use std::collections::HashMap;
use super::{filter_token, Drawer, PropertyDrawer, SyntaxKind, Token};
impl PropertyDrawer {
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.iter().count(), 2);
/// ```
pub fn iter(&self) -> impl Iterator<Item = (Token, Token)> {
self.node_properties().filter_map(|property| {
let mut texts = property
.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT));
Some((texts.next()?, texts.next()?))
})
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.get("CUSTOM_ID").unwrap(), "someid");
/// assert_eq!(drawer.get("ID").unwrap(), "id");
/// ```
pub fn get(&self, key: &str) -> Option<Token> {
self.iter().find_map(|(k, v)| (k == key).then_some(v))
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:CUSTOM_ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_hash_map();
/// assert_eq!(map.len(), 1);
/// assert_eq!(map.get("CUSTOM_ID").unwrap(), "id");
/// ```
pub fn to_hash_map(&self) -> HashMap<Token, Token> {
self.iter().collect()
}
#[cfg(feature = "indexmap")]
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_index_map();
/// let item1 = map.get_index(1).unwrap();
/// assert_eq!(item1.0, "ID");
/// assert_eq!(item1.1, "id");
/// ```
pub fn to_index_map(&self) -> indexmap::IndexMap<Token, Token> {
self.iter().collect()
}
/// Beginning position of drawer content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "property drawer must contains DRAWER_BEGIN");
TextSize::default()
})
}
/// Ending position of drawer content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "property drawer must contains DRAWER_END");
TextSize::default()
})
}
}
impl Drawer {
/// ```rust
/// use orgize::{Org, ast::Drawer};
///
/// let org = Org::parse("* Heading\n:LOGBOOK:\n:END:");
/// let drawer = org.first_node::<Drawer>().unwrap();
/// assert_eq!(drawer.name(), "LOGBOOK");
/// ```
pub fn name(&self) -> Token {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.expect("drawer must contains DRAWER_BEGIN")
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("drawer begin must contains TEXT")
}
/// Beginning position of drawer content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "drawer must contains DRAWER_BEGIN");
TextSize::default()
})
}
/// Ending position of drawer content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "drawer must contains DRAWER_END");
TextSize::default()
})
}
/// Raw text of drawer content
pub fn content_raw(&self) -> String {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_CONTENT)
.map(|n| n.to_string())
.unwrap_or_default()
}
}

168
src/ast/entity.rs Normal file
View file

@ -0,0 +1,168 @@
use crate::{entities::ENTITIES, SyntaxKind};
use super::{filter_token, Entity};
impl Entity {
fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> {
let token = self
.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))?;
ENTITIES.iter().find(|i| i.0 == token.as_ref())
}
/// Entity name
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\alpha{}").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), "alpha");
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), " ");
/// ```
pub fn name(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.0,
)
}
/// Entity LaTeX representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert_eq!(e.latex(), "\\textperiodcentered{}");
/// ```
pub fn latex(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.1,
)
}
/// Whether entity needs to be in math mode
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert!(!e.is_latex_math());
/// let e = Org::parse("\\alefsym").first_node::<Entity>().unwrap();
/// assert!(e.is_latex_math());
/// ```
pub fn is_latex_math(&self) -> bool {
self.entity().map_or_else(
|| {
debug_assert!(false);
false
},
|e| e.2,
)
}
/// Entity HTML representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.html(), "&sect;");
/// ```
pub fn html(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.3,
)
}
/// Entity ASCII representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.ascii(), "section");
/// ```
pub fn ascii(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.4,
)
}
/// Entity Latin1 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), ">");
/// ```
pub fn latin1(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.5,
)
}
/// Entity UTF-8 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "");
/// ```
pub fn utf8(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.6,
)
}
/// Entity contains optional brackets
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\beta").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// let e = Org::parse("\\S{}").first_node::<Entity>().unwrap();
/// assert!(e.is_use_brackets());
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// ```
pub fn is_use_brackets(&self) -> bool {
self.syntax
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::TEXT)
.nth(1)
.is_some()
}
}

20
src/ast/fixed_width.rs Normal file
View file

@ -0,0 +1,20 @@
use crate::SyntaxKind;
use super::{filter_token, FixedWidth};
impl FixedWidth {
/// Contents without colons prefix
///
/// ```rust
/// use orgize::{ast::FixedWidth, Org};
///
/// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::<FixedWidth>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

357
src/ast/generate.js Normal file
View file

@ -0,0 +1,357 @@
const nodes = [
{
struct: "Document",
kind: ["DOCUMENT"],
pre_blank: true,
first_child: [
["section", "Section"],
["first_headline", "Headline"],
],
last_child: [["last_headline", "Headline"]],
children: [["headlines", "Headline"]],
},
{
struct: "Section",
kind: ["SECTION"],
post_blank: true,
},
{
struct: "Paragraph",
kind: ["PARAGRAPH"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Headline",
kind: ["HEADLINE"],
first_child: [
["section", "Section"],
["planning", "Planning"],
["properties", "PropertyDrawer"],
],
children: [["headlines", "Headline"]],
post_blank: true,
},
{
struct: "PropertyDrawer",
kind: ["PROPERTY_DRAWER"],
children: [["node_properties", "NodeProperty"]],
},
{
struct: "NodeProperty",
kind: ["NODE_PROPERTY"],
},
{
struct: "Planning",
kind: ["PLANNING"],
},
{
struct: "OrgTable",
kind: ["ORG_TABLE"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "OrgTableRow",
kind: ["ORG_TABLE_RULE_ROW", "ORG_TABLE_STANDARD_ROW"],
},
{
struct: "OrgTableCell",
kind: ["ORG_TABLE_CELL"],
},
{
struct: "List",
kind: ["LIST"],
children: [["items", "ListItem"]],
affiliated_keywords: true,
},
{
struct: "ListItem",
kind: ["LIST_ITEM"],
},
{
struct: "Drawer",
kind: ["DRAWER"],
},
{
struct: "DynBlock",
kind: ["DYN_BLOCK"],
affiliated_keywords: true,
},
{
struct: "Keyword",
kind: ["KEYWORD"],
},
{
struct: "BabelCall",
kind: ["BABEL_CALL"],
},
{
struct: "AffiliatedKeyword",
kind: ["AFFILIATED_KEYWORD"],
},
{
struct: "TableEl",
kind: ["TABLE_EL"],
post_blank: true,
},
{
struct: "Clock",
kind: ["CLOCK"],
post_blank: true,
},
{
struct: "FnDef",
kind: ["FN_DEF"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Comment",
kind: ["COMMENT"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "Rule",
kind: ["RULE"],
post_blank: true,
},
{
struct: "FixedWidth",
kind: ["FIXED_WIDTH"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "SpecialBlock",
kind: ["SPECIAL_BLOCK"],
affiliated_keywords: true,
},
{
struct: "QuoteBlock",
kind: ["QUOTE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CenterBlock",
kind: ["CENTER_BLOCK"],
affiliated_keywords: true,
},
{
struct: "VerseBlock",
kind: ["VERSE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CommentBlock",
kind: ["COMMENT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExampleBlock",
kind: ["EXAMPLE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExportBlock",
kind: ["EXPORT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "SourceBlock",
kind: ["SOURCE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "InlineCall",
kind: ["INLINE_CALL"],
},
{
struct: "InlineSrc",
kind: ["INLINE_SRC"],
},
{
struct: "Link",
kind: ["LINK"],
},
{
struct: "Cookie",
kind: ["COOKIE"],
},
{
struct: "RadioTarget",
kind: ["RADIO_TARGET"],
},
{
struct: "FnRef",
kind: ["FN_REF"],
},
{
struct: "Macros",
kind: ["MACROS"],
},
{
struct: "Snippet",
kind: ["SNIPPET"],
},
{
struct: "Target",
kind: ["TARGET"],
},
{
struct: "Bold",
kind: ["BOLD"],
},
{
struct: "Strike",
kind: ["STRIKE"],
},
{
struct: "Italic",
kind: ["ITALIC"],
},
{
struct: "Underline",
kind: ["UNDERLINE"],
},
{
struct: "Verbatim",
kind: ["VERBATIM"],
},
{
struct: "Code",
kind: ["CODE"],
token: [["text", "TEXT"]],
},
{
struct: "Timestamp",
kind: ["TIMESTAMP_ACTIVE", "TIMESTAMP_INACTIVE", "TIMESTAMP_DIARY"],
token: [
["year_start", "TIMESTAMP_YEAR"],
["month_start", "TIMESTAMP_MONTH"],
["day_start", "TIMESTAMP_DAY"],
["hour_start", "TIMESTAMP_HOUR"],
["minute_start", "TIMESTAMP_MINUTE"],
],
last_token: [
["year_end", "TIMESTAMP_YEAR"],
["month_end", "TIMESTAMP_MONTH"],
["day_end", "TIMESTAMP_DAY"],
["hour_end", "TIMESTAMP_HOUR"],
["minute_end", "TIMESTAMP_MINUTE"],
],
},
{
struct: "LatexEnvironment",
kind: ["LATEX_ENVIRONMENT"],
},
{
struct: "LatexFragment",
kind: ["LATEX_FRAGMENT"],
},
{
struct: "Entity",
kind: ["ENTITY"],
},
{
struct: "LineBreak",
kind: ["LINE_BREAK"],
},
{
struct: "Superscript",
kind: ["SUPERSCRIPT"],
},
{
struct: "Subscript",
kind: ["SUBSCRIPT"],
},
];
let content = `//! generated file, do not modify it directly
#![allow(clippy::all)]
#![allow(unused)]
use rowan::{ast::{support, AstChildren, AstNode}, TextSize, TextRange};
use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken};
fn affiliated_keyword(node: &SyntaxNode, filter: impl Fn(&str) -> bool) -> Option<AffiliatedKeyword> {
node.children()
.take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD)
.filter_map(AffiliatedKeyword::cast)
.find(|k| filter(&k.key()))
}
`;
for (const node of nodes) {
content += `
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ${node.struct} {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for ${node.struct} {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool { ${node.kind
.map((k) => `kind == ${k}`)
.join(" || ")} }
fn cast(node: SyntaxNode) -> Option<${
node.struct
}> { Self::can_cast(node.kind()).then(|| ${node.struct} { syntax: node }) }
fn syntax(&self) -> &SyntaxNode { &self.syntax }
}
impl ${node.struct} {
/// Beginning position of this element
pub fn start(&self) -> TextSize {
self.syntax.text_range().start()
}
/// Ending position of this element
pub fn end(&self) -> TextSize {
self.syntax.text_range().end()
}
/// Range of this element
pub fn text_range(&self) -> TextRange {
self.syntax.text_range()
}
/// Raw text of this element
pub fn raw(&self) -> String {
self.syntax.to_string()
}
`;
for (const [method, kind] of node.token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.last_token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::last_token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.parent || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { self.syntax.parent().and_then(${kind}::cast) }\n`;
}
for (const [method, kind] of node.first_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { support::child(&self.syntax) }\n`;
}
for (const [method, kind] of node.last_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { super::last_child(&self.syntax) }\n`;
}
for (const [method, kind] of node.children || []) {
content += ` pub fn ${method}(&self) -> AstChildren<${kind}> { support::children(&self.syntax) }\n`;
}
if (node.post_blank) {
content += ` pub fn post_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.pre_blank) {
content += ` pub fn pre_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.affiliated_keywords) {
content += ` pub fn caption(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "CAPTION") }\n`;
content += ` pub fn header(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "HEADER") }\n`;
content += ` pub fn name(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "NAME") }\n`;
content += ` pub fn plot(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "PLOT") }\n`;
content += ` pub fn results(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "RESULTS") }\n`;
content += ` pub fn attr(&self, backend: &str) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k.starts_with("ATTR_") && &k[5..] == backend) }\n`;
}
content += `}\n`;
}
require("fs").writeFileSync(__dirname + "/generated.rs", content);

2276
src/ast/generated.rs Normal file

File diff suppressed because it is too large Load diff

276
src/ast/headline.rs Normal file
View file

@ -0,0 +1,276 @@
use rowan::{ast::AstNode, NodeOrToken};
use crate::{syntax::SyntaxKind, SyntaxElement};
use super::{filter_token, Clock, Drawer, Headline, Section, Timestamp, Token};
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TodoType {
Todo,
Done,
}
impl Headline {
/// Return level of this headline
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* ").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 1);
/// let hdl = Org::parse("****** hello").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 6);
/// ```
pub fn level(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::HEADLINE_STARS))
.map_or_else(
|| {
debug_assert!(false, "headline must contains HEADLINE_STARS");
0
},
|stars| stars.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_keyword().unwrap(), "TODO");
/// ```
pub fn todo_keyword(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk)
if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO
|| tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE =>
{
Some(Token(tk))
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::{Headline, TodoType}};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Todo);
/// let hdl = Org::parse("*** DONE a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Done);
/// ```
pub fn todo_type(&self) -> Option<TodoType> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO => {
Some(TodoType::Todo)
}
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => {
Some(TodoType::Done)
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert!(hdl.is_todo());
/// let hdl = Org::parse("* a").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_todo());
/// ```
pub fn is_todo(&self) -> bool {
matches!(self.todo_type(), Some(TodoType::Todo))
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* DONE a").first_node::<Headline>().unwrap();
/// assert!(hdl.is_done());
/// let hdl = Org::parse("* a").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_done());
/// ```
pub fn is_done(&self) -> bool {
matches!(self.todo_type(), Some(TodoType::Done))
}
/// Returns parsed title
///
/// ```rust
/// use orgize::{Org, ast::Headline, SyntaxKind};
///
/// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::<Headline>().unwrap();
/// let title = hdl.title().collect::<Vec<_>>();
/// assert_eq!(title[1].kind(), SyntaxKind::BOLD);
/// assert_eq!(title[1].to_string(), "*abc*");
/// assert_eq!(title[3].kind(), SyntaxKind::ITALIC);
/// assert_eq!(title[3].to_string(), "/abc/");
/// ```
pub fn title(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE)
.into_iter()
.flat_map(|n| n.children_with_tokens())
}
/// Returns title raw string
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::<Headline>().unwrap();
/// let title = hdl.title_raw();
/// assert_eq!(title, "abc *abc* /abc/ ");
/// ```
pub fn title_raw(&self) -> String {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE)
.map(|n| n.to_string())
.unwrap_or_default()
}
/// Return `true` if this headline contains a COMMENT keyword
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* COMMENT").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* COMMENT hello").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* hello").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_commented());
/// ```
pub fn is_commented(&self) -> bool {
self.title()
.next()
.map(|first| {
if let Some(t) = first.as_token() {
let text = t.text();
t.kind() == SyntaxKind::TEXT
&& text.starts_with("COMMENT")
&& (text.len() == 7 || text[7..].starts_with(char::is_whitespace))
} else {
false
}
})
.unwrap_or_default()
}
/// Return `true` if this headline contains an archive tag
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* hello :ARCHIVE:").first_node::<Headline>().unwrap();
/// assert!(hdl.is_archived());
/// let hdl = Org::parse("* hello :ARCHIVED:").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_archived());
/// ```
pub fn is_archived(&self) -> bool {
self.tags().any(|t| t == "ARCHIVE")
}
/// Returns this headline's closed timestamp, or `None` if not set.
pub fn closed(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.closed())
}
/// Returns this headline's scheduled timestamp, or `None` if not set.
pub fn scheduled(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.scheduled())
}
/// Returns this headline's deadline timestamp, or `None` if not set.
pub fn deadline(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.deadline())
}
/// Returns an iterator of text token in this tags
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let tags_vec = |input: &str| {
/// let hdl = Org::parse(input).first_node::<Headline>().unwrap();
/// let tags: Vec<_> = hdl.tags().map(|t| t.to_string()).collect();
/// tags
/// };
///
/// assert_eq!(tags_vec("* :tag:"), vec!["tag".to_string()]);
/// assert_eq!(tags_vec("* [#A] :::::a2%:"), vec!["a2%".to_string()]);
/// assert_eq!(tags_vec("* TODO :tag: :a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// assert_eq!(tags_vec("* title :tag:a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// ```
pub fn tags(&self) -> impl Iterator<Item = Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TAGS)
.into_iter()
.flat_map(|t| t.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
}
/// Returns priority text
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* [#A]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "A");
/// let hdl = Org::parse("** DONE [#B]::").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "B");
/// let hdl = Org::parse("* [#破]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "破");
/// ```
pub fn priority(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_PRIORITY)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
/// Returns an iterator of clock element affiliated with this headline
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let org = Org::parse(r#"* TODO
/// foo
/// :LOGBOOK:
/// bar
/// CLOCK:
/// CLOCK: [2024-10-12]
/// baz
/// CLOCK: [2024-10-12]
/// [2024-10-12]
/// :END:
/// foo"#);
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.clocks().count(), 2);
/// ```
pub fn clocks(&self) -> impl Iterator<Item = Clock> {
self.syntax
.children()
.flat_map(Section::cast)
.flat_map(|x| x.syntax.children().filter_map(Drawer::cast))
.filter(|d| d.name().eq_ignore_ascii_case("LOGBOOK"))
.filter_map(|d| {
d.syntax
.children()
.find(|children| children.kind() == SyntaxKind::DRAWER_CONTENT)
})
.flat_map(|x| x.children().filter_map(Clock::cast))
}
}

80
src/ast/inline_call.rs Normal file
View file

@ -0,0 +1,80 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, InlineCall, Token};
impl InlineCall {
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.call(), "square");
/// ```
pub fn call(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("inline call must contains two TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.inside_header().unwrap(), ":results output");
///
/// let call = Org::parse("call_square(4)[:results html]").first_node::<InlineCall>().unwrap();
/// assert!(call.inside_header().is_none());
/// ```
pub fn inside_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.take_while(|e| e.kind() != SyntaxKind::L_PARENS)
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.arguments(), "4");
/// ```
pub fn arguments(&self) -> Token {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_PARENS)
.find_map(filter_token(SyntaxKind::TEXT))
.expect("inline call must contains TEXT after L_PARENS")
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)[:results html]").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.end_header().unwrap(), ":results html");
///
/// let call = Org::parse("call_square[:results output](4)").first_node::<InlineCall>().unwrap();
/// assert!(call.end_header().is_none());
/// ```
pub fn end_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.skip(1)
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
}

62
src/ast/inline_src.rs Normal file
View file

@ -0,0 +1,62 @@
use crate::SyntaxKind;
use super::{filter_token, InlineSrc, Token};
impl InlineSrc {
/// Language of the code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "C");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "xml");
/// ```
pub fn language(&self) -> Token {
self.syntax
.children_with_tokens()
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
.expect("inline src must contains TEXT")
}
/// Optional header arguments
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert!(s.parameters().is_none());
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.parameters().unwrap(), ":exports code");
/// ```
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|n| {
debug_assert_eq!(n.kind(), SyntaxKind::TEXT);
Some(Token(n.into_token()?))
})
}
/// Source code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "int a = 0;");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "<tag>text</tag>");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
.expect("inline src must contains TEXT")
}
}

36
src/ast/keyword.rs Normal file
View file

@ -0,0 +1,36 @@
use crate::SyntaxKind;
use super::{filter_token, Keyword, Token};
impl Keyword {
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.key(), "KEY");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("keyword must contains TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), " VALUE");
/// let keyword = Org::parse("#+KEY:").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), "");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("keyword must contains two TEXT")
}
}

121
src/ast/link.rs Normal file
View file

@ -0,0 +1,121 @@
use rowan::ast::AstNode;
use super::{token, AffiliatedKeyword, Link, Paragraph, Token};
use crate::{syntax::SyntaxKind, SyntaxElement};
impl Link {
/// Returns link destination
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[#id]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "#id");
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// ```
pub fn path(&self) -> Token {
token(&self.syntax, SyntaxKind::LINK_PATH).expect("link must contains LINK_PATH")
}
/// Returns `true` if link contains description
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.has_description());
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert!(link.has_description());
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// assert!(link.has_description());
/// ```
pub fn has_description(&self) -> bool {
self.syntax()
.children_with_tokens()
.any(|e| e.kind() == SyntaxKind::L_BRACKET)
}
/// Returns parsed description
///
/// Returns empty iterator if this link doesn't contain description
///
/// ```rust
/// use orgize::{Org, ast::Link, SyntaxKind};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description().count(), 0);
///
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// let description = link.description().collect::<Vec<_>>();
/// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::TEXT, "Google".into()));
///
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// let description = link.description().collect::<Vec<_>>();
/// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::BOLD, "*abc*".into()));
/// assert_eq!((description[2].kind(), description[2].to_string()), (SyntaxKind::ITALIC, "/abc/".into()));
/// ```
pub fn description(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax()
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.skip(1)
.take_while(|e| e.kind() != SyntaxKind::R_BRACKET2)
}
/// Returns description raw string
///
/// Returns empty string if this link doesn't contain description
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "");
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "Google");
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "*abc* /abc/");
/// ```
pub fn description_raw(&self) -> String {
self.description()
.fold(String::new(), |acc, e| acc + &e.to_string())
}
/// Returns `true` if link is an image link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.is_image());
/// let link = Org::parse("[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert!(link.is_image());
/// ```
pub fn is_image(&self) -> bool {
const IMAGE_SUFFIX: &[&str] = &[
// https://github.com/bzg/org-mode/blob/7de1e818d5fbe6a05c6b1a007eed07dc27e7246b/lisp/ox.el#L253
".png", ".jpeg", ".jpg", ".gif", ".tiff", ".tif", ".xbm", ".xpm", ".pbm", ".pgm",
".ppm", ".webp", ".avif", ".svg",
];
let path = self.path();
IMAGE_SUFFIX.iter().any(|e| path.ends_with(e)) && !self.has_description()
}
/// Returns caption keyword in this link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("#+CAPTION: image link\n[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert_eq!(link.caption().unwrap().value().unwrap(), " image link");
/// ```
pub fn caption(&self) -> Option<AffiliatedKeyword> {
// TODO: support other element type
Paragraph::cast(self.syntax.parent()?.clone())?.caption()
}
}

139
src/ast/list.rs Normal file
View file

@ -0,0 +1,139 @@
use super::{filter_token, List, ListItem, Token};
use crate::{syntax::SyntaxKind, SyntaxElement};
impl List {
/// Returns `true` if this list is an ordered link
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("+ 1").first_node::<List>().unwrap();
/// assert!(!list.is_ordered());
///
/// let list = Org::parse("1. 1").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
///
/// let list = Org::parse("1) 1\n- 2\n3. 3").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
/// ```
pub fn is_ordered(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list muts contains LIST_ITEM");
false
},
|item| item.bullet().starts_with(|c: char| c.is_ascii_digit()),
)
}
/// Returns `true` if this list contains a TAG
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("- some tag :: item 2.1").first_node::<List>().unwrap();
/// assert!(list.is_descriptive());
/// let list = Org::parse("2. [X] item 2").first_node::<List>().unwrap();
/// assert!(!list.is_descriptive());
/// ```
pub fn is_descriptive(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list must contains LIST_ITEM");
false
},
|item| {
item.syntax
.children()
.any(|it| it.kind() == SyntaxKind::LIST_ITEM_TAG)
},
)
}
}
impl ListItem {
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 0);
/// let item = Org::parse(" \t * 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 3);
/// ```
pub fn indent(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_INDENT))
.map_or_else(
|| {
debug_assert!(false, "list item must contains LIST_ITEM_INDENT");
0
},
|t| t.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- some tag").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "- ");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "2. ");
/// ```
pub fn bullet(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_BULLET))
.expect("list item must contains LIST_ITEM_BULLET")
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- [-] item 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "-");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "X");
/// let item = Org::parse("3) [ ] item 3").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), " ");
/// ```
pub fn checkbox(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_CHECK_BOX)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
pub fn counter(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_COUNTER)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("+ this is *TAG* :: item1").first_node::<ListItem>().unwrap();
/// let tag = item.tag().map(|n| n.to_string()).collect::<String>();
/// assert_eq!(tag, "this is *TAG* ");
/// ```
pub fn tag(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_TAG)
.into_iter()
.flat_map(|n| {
n.children_with_tokens().filter(|n| {
n.kind() != SyntaxKind::WHITESPACE && n.kind() != SyntaxKind::COLON2
})
})
}
}

35
src/ast/macros.rs Normal file
View file

@ -0,0 +1,35 @@
use crate::SyntaxKind;
use super::{filter_token, Macros, Token};
impl Macros {
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "title");
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "two_arg_macro");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("macros must contains TEXT")
}
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert!(m.args().is_none());
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.args().unwrap(), "1, 2");
/// ```
pub fn args(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
}
}

176
src/ast/mod.rs Normal file
View file

@ -0,0 +1,176 @@
mod generated;
mod affiliated_keyword;
mod block;
mod clock;
#[cfg(feature = "syntax-org-fc")]
mod cloze;
mod comment;
mod document;
mod drawer;
mod entity;
mod fixed_width;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;
mod planning;
mod snippet;
mod table;
mod timestamp;
#[cfg(feature = "syntax-org-fc")]
pub use cloze::*;
pub use generated::*;
pub use headline::*;
pub use rowan::ast::support::*;
pub use timestamp::*;
use crate::{
syntax::{SyntaxKind, SyntaxNode},
SyntaxToken,
};
use rowan::{ast::AstNode, NodeOrToken, TextRange, TextSize};
use std::{
borrow::{Borrow, Cow},
fmt,
hash::Hash,
ops::Deref,
};
pub fn blank_lines(parent: &SyntaxNode) -> usize {
parent
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::BLANK_LINE)
.count()
}
pub fn last_child<N: AstNode>(parent: &rowan::SyntaxNode<N::Language>) -> Option<N> {
parent.children().filter_map(N::cast).last()
}
pub fn last_token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
parent
.children_with_tokens()
.filter_map(filter_token(kind))
.last()
}
pub fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
rowan::ast::support::token(parent, kind).map(Token)
}
pub fn filter_token(
kind: SyntaxKind,
) -> impl Fn(NodeOrToken<SyntaxNode, SyntaxToken>) -> Option<Token> {
move |elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == kind => Some(Token(tk)),
_ => None,
}
}
/// A simple wrapper of `SyntaxToken`
///
/// It implements the `AsRef<str>` and `Display` trait,
/// allowing to directly use some `str` methods.
///
/// Also it implements `Hash` and `Eq` traits, so can be
/// used as keys in `HashMap`. However, note that it only
/// compares the underlying text inside `SyntaxToken`,
/// meaning two `Token`s from different positions
/// might be considered equal.
#[derive(Eq, Clone)]
pub struct Token(pub(crate) SyntaxToken);
impl Token {
pub fn syntax(&self) -> &SyntaxToken {
&self.0
}
/// Range of this token
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
/// Beginning position of this token
pub fn start(&self) -> TextSize {
self.0.text_range().start()
}
/// Ending position of this token
pub fn end(&self) -> TextSize {
self.0.text_range().end()
}
}
impl AsRef<str> for Token {
fn as_ref(&self) -> &str {
self.0.text()
}
}
impl Borrow<str> for Token {
fn borrow(&self) -> &str {
self.as_ref()
}
}
impl<'a> PartialEq<&'a str> for Token {
fn eq(&self, other: &&'a str) -> bool {
self.as_ref() == *other
}
}
impl PartialEq<String> for Token {
fn eq(&self, other: &String) -> bool {
self.as_ref() == other
}
}
impl PartialEq<Token> for Token {
fn eq(&self, other: &Token) -> bool {
self.as_ref() == other.as_ref()
}
}
impl Hash for Token {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.as_ref().hash(state)
}
}
impl<'a> PartialEq<Cow<'a, str>> for Token {
fn eq(&self, other: &Cow<'a, str>) -> bool {
self.as_ref() == other
}
}
impl PartialEq<str> for Token {
fn eq(&self, other: &str) -> bool {
self.as_ref() == other
}
}
impl Deref for Token {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_ref()
}
}
impl fmt::Debug for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.text(), f)
}
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.text(), f)
}
}

67
src/ast/planning.rs Normal file
View file

@ -0,0 +1,67 @@
use rowan::ast::AstNode;
use super::{Planning, Timestamp};
use crate::syntax::SyntaxKind;
impl Planning {
/// Returns deadline timestamp
///
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nDEADLINE: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .deadline()
/// .unwrap();
/// assert_eq!(s.day_start().unwrap(), "08");
/// ```
pub fn deadline(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_DEADLINE)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns scheduled timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nSCHEDULED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .scheduled()
/// .unwrap();
/// assert_eq!(s.year_start().unwrap(), "2019");
/// ```
pub fn scheduled(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_SCHEDULED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns closed timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nCLOSED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .closed()
/// .unwrap();
/// assert_eq!(s.month_start().unwrap(), "04");
/// ```
pub fn closed(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_CLOSED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
}

34
src/ast/snippet.rs Normal file
View file

@ -0,0 +1,34 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, Snippet, Token};
impl Snippet {
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.backend(), "BACKEND");
/// ```
pub fn backend(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("snippet must contains TEXT")
}
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "");
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "VALUE");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("snippet must contains two TEXT")
}
}

110
src/ast/table.rs Normal file
View file

@ -0,0 +1,110 @@
use rowan::ast::AstNode;
use super::{filter_token, OrgTable, OrgTableRow, Token};
use crate::syntax::SyntaxKind;
impl OrgTable {
/// Returns `true` if this table has a header
///
/// A table has a header when it contains at least two row groups.
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let org = Org::parse(r#"
/// | a | b |
/// |---+---|
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | 0 | 1 |
/// |---+---|
/// | a | w |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
///
/// let org = Org::parse(r#"
/// |---+---|
/// | a | b |
/// | c | d |
/// |---+---|"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
/// ```
pub fn has_header(&self) -> bool {
self.syntax
.children()
.filter_map(OrgTableRow::cast)
.skip_while(|row| row.is_rule())
.skip_while(|row| row.is_standard())
.any(|row| !row.is_rule())
}
/// Formulas associated to the table
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let table = Org::parse("| a |").first_node::<OrgTable>().unwrap();
/// assert_eq!(table.tblfm().count(), 0);
///
/// let table = Org::parse("| a |\n#+tblfm: test").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 1);
/// assert_eq!(tblfm[0], " test");
///
/// let table = Org::parse("| a |\n#+TBLFM: test1\n#+TBLFM: test2").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 2);
/// assert_eq!(tblfm[0], " test1");
/// assert_eq!(tblfm[1], " test2");
/// ```
pub fn tblfm(&self) -> impl Iterator<Item = Token> {
self.syntax.children().filter_map(|n| {
if n.kind() == SyntaxKind::KEYWORD {
n.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
} else {
None
}
})
}
}
impl OrgTableRow {
/// Returns `true` if this row is a rule
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|----|----|\n|Foo |Bar |");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_rule());
/// ```
pub fn is_rule(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_RULE_ROW
}
/// Returns `true` if this row is a standard row
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|Foo |Bar |\n|----|----|");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_standard());
/// ```
pub fn is_standard(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_STANDARD_ROW
}
}

301
src/ast/timestamp.rs Normal file
View file

@ -0,0 +1,301 @@
use super::{filter_token, Timestamp};
use crate::syntax::SyntaxKind;
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum DelayType {
All,
First,
}
impl Timestamp {
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<2003-09-16 Tue 09:39-10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>--<2003-09-16 Tue 10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// ```
pub fn is_active(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_ACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// ```
pub fn is_inactive(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_INACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<%%(org-calendar-holiday)>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_diary());
/// ```
pub fn is_diary(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_DIARY
}
/// Returns `true` if this timestamp has a range
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16]--[2003-09-16]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(!ts.is_range());
/// ```
pub fn is_range(&self) -> bool {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::MINUS))
.count()
> 2
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, RepeaterType}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Cumulate));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Restart));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), None);
/// ```
pub fn repeater_type(&self) -> Option<RepeaterType> {
self.nth_repeater(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(1));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(10));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), None);
/// ```
pub fn repeater_value(&self) -> Option<u32> {
self.nth_repeater(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Day));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), None);
/// ```
pub fn repeater_unit(&self) -> Option<TimeUnit> {
self.nth_repeater(0).map(|i| i.2)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, DelayType}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::First));
/// ```
pub fn warning_type(&self) -> Option<DelayType> {
self.nth_delay(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(3));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(5));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(10));
/// ```
pub fn warning_value(&self) -> Option<u32> {
self.nth_delay(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Year));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Month));
/// ```
pub fn warning_unit(&self) -> Option<TimeUnit> {
self.nth_delay(0).map(|i| i.2)
}
fn nth_repeater(&self, nth: usize) -> Option<(RepeaterType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_REPEATER_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"++" => Some(RepeaterType::CatchUp),
"+" => Some(RepeaterType::Cumulate),
".+" => Some(RepeaterType::Restart),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
fn nth_delay(&self, nth: usize) -> Option<(DelayType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_DELAY_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"-" => Some(DelayType::All),
"--" => Some(DelayType::First),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
/// Converts timestamp start to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.start_to_chrono().unwrap(), "2003-09-16T09:39:00".parse::<NaiveDateTime>().unwrap());
///
/// let ts = Org::parse("[2003-13-00 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.start_to_chrono().is_none());
/// ```
#[cfg(feature = "chrono")]
pub fn start_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_start()?.parse().ok()?,
self.month_start()?.parse().ok()?,
self.day_start()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_start()?.parse().ok()?,
self.minute_start()?.parse().ok()?,
0,
)?,
))
}
/// Converts timestamp end to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.end_to_chrono().unwrap(), "2003-09-16T10:39:00".parse::<NaiveDateTime>().unwrap());
/// ```
#[cfg(feature = "chrono")]
pub fn end_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_end()?.parse().ok()?,
self.month_end()?.parse().ok()?,
self.day_end()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_end()?.parse().ok()?,
self.minute_end()?.parse().ok()?,
0,
)?,
))
}
/// Returns chrono::TimeDelta between timestamp start and end
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.time_delta().unwrap().num_hours(), 1);
/// ```
#[cfg(feature = "chrono")]
pub fn time_delta(&self) -> Option<chrono::TimeDelta> {
Some(self.end_to_chrono()? - self.start_to_chrono()?)
}
}

View file

@ -1,18 +1,87 @@
use crate::syntax::document::document_node;
use crate::Org;
#[derive(Clone, Debug)]
pub enum UseSubSuperscript {
Nil,
Brace,
True,
}
impl UseSubSuperscript {
pub fn is_nil(&self) -> bool {
matches!(self, UseSubSuperscript::Nil)
}
pub fn is_true(&self) -> bool {
matches!(self, UseSubSuperscript::True)
}
pub fn is_brace(&self) -> bool {
matches!(self, UseSubSuperscript::Brace)
}
}
/// Parse configuration
#[derive(Clone, Debug)]
pub struct ParseConfig {
/// Headline's todo keywords
pub todo_keywords: (Vec<String>, Vec<String>),
pub dual_keywords: Vec<String>,
pub parsed_keywords: Vec<String>,
/// Control sub/superscript parsing
///
/// Equivalent to `org-use-sub-superscripts`
///
/// - `UseSubSuperscript::Nil`: disable parsing
/// - `UseSubSuperscript::True`: enable parsing
/// - `UseSubSuperscript::Brace`: enable parsing, but braces are required
pub use_sub_superscript: UseSubSuperscript,
/// Affiliated keywords
///
/// Equivalent to [`org-element-affiliated-keywords`](https://git.sr.ht/~bzg/org-mode/tree/6f960f3c6a4dfe137fbd33fef9f7dadfd229600c/item/lisp/org-element.el#L331)
pub affiliated_keywords: Vec<String>,
}
impl ParseConfig {
/// Parses input with current config
pub fn parse(self, input: impl AsRef<str>) -> Org {
let input = (input.as_ref(), &self).into();
let node = document_node(input).unwrap().1;
Org {
config: self,
green: node.into_node().unwrap(),
}
}
}
impl Default for ParseConfig {
fn default() -> Self {
ParseConfig {
todo_keywords: (vec![String::from("TODO")], vec![String::from("DONE")]),
todo_keywords: (vec!["TODO".into()], vec!["DONE".into()]),
dual_keywords: vec!["CAPTION".into(), "RESULTS".into()],
parsed_keywords: vec!["CAPTION".into()],
use_sub_superscript: UseSubSuperscript::True,
affiliated_keywords: vec![
"CAPTION".into(),
"DATA".into(),
"HEADER".into(),
"HEADERS".into(),
"LABEL".into(),
"NAME".into(),
"PLOT".into(),
"RESNAME".into(),
"RESULT".into(),
"RESULTS".into(),
"SOURCE".into(),
"SRCNAME".into(),
"TBLNAME".into(),
],
}
}
}
lazy_static::lazy_static! {
pub static ref DEFAULT_CONFIG: ParseConfig = ParseConfig::default();
}

View file

@ -1,408 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0},
sequence::preceded,
IResult,
};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Special Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct SpecialBlock<'a> {
/// Block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Block name
pub name: Cow<'a, str>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl SpecialBlock<'_> {
pub fn into_owned(self) -> SpecialBlock<'static> {
SpecialBlock {
name: self.name.into_owned().into(),
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Quote Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct QuoteBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl QuoteBlock<'_> {
pub fn into_owned(self) -> QuoteBlock<'static> {
QuoteBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Center Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct CenterBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl CenterBlock<'_> {
pub fn into_owned(self) -> CenterBlock<'static> {
CenterBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Verse Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct VerseBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl VerseBlock<'_> {
pub fn into_owned(self) -> VerseBlock<'static> {
VerseBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Comment Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct CommentBlock<'a> {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub data: Option<Cow<'a, str>>,
/// Comment block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl CommentBlock<'_> {
pub fn into_owned(self) -> CommentBlock<'static> {
CommentBlock {
data: self.data.map(Into::into).map(Cow::Owned),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Example Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct ExampleBlock<'a> {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub data: Option<Cow<'a, str>>,
/// Block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl ExampleBlock<'_> {
pub fn into_owned(self) -> ExampleBlock<'static> {
ExampleBlock {
data: self.data.map(Into::into).map(Cow::Owned),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Export Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct ExportBlock<'a> {
pub data: Cow<'a, str>,
/// Block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl ExportBlock<'_> {
pub fn into_owned(self) -> ExportBlock<'static> {
ExportBlock {
data: self.data.into_owned().into(),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Src Block Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct SourceBlock<'a> {
/// Block contents
pub contents: Cow<'a, str>,
/// Language of the code in the block
pub language: Cow<'a, str>,
pub arguments: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl SourceBlock<'_> {
pub fn into_owned(self) -> SourceBlock<'static> {
SourceBlock {
language: self.language.into_owned().into(),
arguments: self.arguments.into_owned().into(),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
// TODO: fn number_lines() -> Some(New) | Some(Continued) | None { }
// TODO: fn preserve_indent() -> bool { }
// TODO: fn use_labels() -> bool { }
// TODO: fn label_fmt() -> Option<String> { }
// TODO: fn retain_labels() -> bool { }
}
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawBlock<'a> {
pub name: &'a str,
pub arguments: &'a str,
pub pre_blank: usize,
pub contents: &'a str,
pub contents_without_blank_lines: &'a str,
pub post_blank: usize,
}
impl<'a> RawBlock<'a> {
pub fn parse(input: &str) -> Option<(&str, RawBlock)> {
parse_internal(input).ok()
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let RawBlock {
name,
contents,
arguments,
pre_blank,
contents_without_blank_lines,
post_blank,
} = self;
let arguments: Option<Cow<'a, str>> = if arguments.is_empty() {
None
} else {
Some(arguments.into())
};
let element = match &*name.to_uppercase() {
"CENTER" => CenterBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"QUOTE" => QuoteBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"VERSE" => VerseBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"COMMENT" => CommentBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXAMPLE" => ExampleBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXPORT" => ExportBlock {
data: arguments.unwrap_or_default(),
contents: contents.into(),
post_blank,
}
.into(),
"SRC" => {
let (language, arguments) = match &arguments {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!(
"`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"
),
};
SourceBlock {
arguments,
language,
contents: contents.into(),
post_blank,
}
.into()
}
_ => SpecialBlock {
parameters: arguments,
name: name.into(),
pre_blank,
post_blank,
}
.into(),
};
(element, contents_without_blank_lines)
}
}
fn parse_internal(input: &str) -> IResult<&str, RawBlock, ()> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, arguments) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?;
let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawBlock {
name,
contents,
arguments: arguments.trim(),
pre_blank,
contents_without_blank_lines,
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
RawBlock::parse(
r#"#+BEGIN_SRC
#+END_SRC"#
),
Some((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "SRC".into(),
arguments: ""
}
))
);
assert_eq!(
RawBlock::parse(
r#"#+begin_src
#+end_src"#
),
Some((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "src".into(),
arguments: ""
}
))
);
assert_eq!(
RawBlock::parse(
r#"#+BEGIN_SRC javascript
console.log('Hello World!');
#+END_SRC
"#
),
Some((
"",
RawBlock {
contents: "console.log('Hello World!');\n",
contents_without_blank_lines: "console.log('Hello World!');\n",
pre_blank: 0,
post_blank: 1,
name: "SRC".into(),
arguments: "javascript"
}
))
);
// TODO: more testing
}

View file

@ -1,242 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag,
character::complete::{char, digit1, space0},
combinator::recognize,
sequence::separated_pair,
IResult,
};
use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp};
use crate::parse::combinators::{blank_lines_count, eol};
/// Clock Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(untagged))]
#[derive(Debug, Clone)]
pub enum Clock<'a> {
/// Closed Clock
Closed {
/// Time start
start: Datetime<'a>,
/// Time end
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
/// Clock duration
duration: Cow<'a, str>,
/// Numbers of blank lines between the clock line and next non-blank
/// line or buffer's end
post_blank: usize,
},
/// Running Clock
Running {
/// Time start
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
/// Numbers of blank lines between the clock line and next non-blank
/// line or buffer's end
post_blank: usize,
},
}
impl Clock<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Clock)> {
parse_internal(input).ok()
}
pub fn into_onwed(self) -> Clock<'static> {
match self {
Clock::Closed {
start,
end,
repeater,
delay,
duration,
post_blank,
} => Clock::Closed {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
duration: duration.into_owned().into(),
post_blank,
},
Clock::Running {
start,
repeater,
delay,
post_blank,
} => Clock::Running {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
post_blank,
},
}
}
/// Returns `true` if the clock is running.
pub fn is_running(&self) -> bool {
match self {
Clock::Closed { .. } => false,
Clock::Running { .. } => true,
}
}
/// Returns `true` if the clock is closed.
pub fn is_closed(&self) -> bool {
match self {
Clock::Closed { .. } => true,
Clock::Running { .. } => false,
}
}
/// Returns clock duration, or `None` if it's running.
pub fn duration(&self) -> Option<&str> {
match self {
Clock::Closed { duration, .. } => Some(duration),
Clock::Running { .. } => None,
}
}
/// Constructs a timestamp from the clock.
pub fn value(&self) -> Timestamp {
match &*self {
Clock::Closed {
start,
end,
repeater,
delay,
..
} => Timestamp::InactiveRange {
start: start.clone(),
end: end.clone(),
repeater: repeater.clone(),
delay: delay.clone(),
},
Clock::Running {
start,
repeater,
delay,
..
} => Timestamp::Inactive {
start: start.clone(),
repeater: repeater.clone(),
delay: delay.clone(),
},
}
}
}
fn parse_internal(input: &str) -> IResult<&str, Clock, ()> {
let (input, _) = space0(input)?;
let (input, _) = tag("CLOCK:")(input)?;
let (input, _) = space0(input)?;
let (input, timestamp) = parse_inactive(input)?;
match timestamp {
Timestamp::InactiveRange {
start,
end,
repeater,
delay,
} => {
let (input, _) = space0(input)?;
let (input, _) = tag("=>")(input)?;
let (input, _) = space0(input)?;
let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?;
let (input, _) = eol(input)?;
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Closed {
start,
end,
repeater,
delay,
duration: duration.into(),
post_blank: blank,
},
))
}
Timestamp::Inactive {
start,
repeater,
delay,
} => {
let (input, _) = eol(input)?;
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Running {
start,
repeater,
delay,
post_blank: blank,
},
))
}
_ => unreachable!(
"`parse_inactive` only returns `Timestamp::InactiveRange` or `Timestamp::Inactive`."
),
}
}
#[test]
fn parse() {
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"),
Some((
"",
Clock::Running {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
repeater: None,
delay: None,
post_blank: 0,
}
))
);
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n"),
Some((
"",
Clock::Closed {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39)
},
repeater: None,
delay: None,
duration: "1:00".into(),
post_blank: 1,
}
))
);
}

View file

@ -1,53 +0,0 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default, Clone)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Comment<'a> {
/// Comments value, with pound signs
pub value: Cow<'a, str>,
/// Numbers of blank lines between last comment's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Comment<'static> {
Comment {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, Comment, ()> {
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
Comment {
value: value.into(),
post_blank,
},
))
}

View file

@ -1,122 +0,0 @@
use std::borrow::Cow;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::recognize,
sequence::{delimited, pair, separated_pair},
IResult,
};
/// Statistics Cookie Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Cookie<'a> {
/// Full cookie value
pub value: Cow<'a, str>,
}
impl Cookie<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Cookie)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Cookie<'static> {
Cookie {
value: self.value.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Cookie, ()> {
let (input, value) = recognize(delimited(
tag("["),
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
tag("]"),
))(input)?;
Ok((
input,
Cookie {
value: value.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Cookie::parse("[1/10]"),
Some((
"",
Cookie {
value: "[1/10]".into()
}
))
);
assert_eq!(
Cookie::parse("[1/1000]"),
Some((
"",
Cookie {
value: "[1/1000]".into()
}
))
);
assert_eq!(
Cookie::parse("[10%]"),
Some((
"",
Cookie {
value: "[10%]".into()
}
))
);
assert_eq!(
Cookie::parse("[%]"),
Some((
"",
Cookie {
value: "[%]".into()
}
))
);
assert_eq!(
Cookie::parse("[/]"),
Some((
"",
Cookie {
value: "[/]".into()
}
))
);
assert_eq!(
Cookie::parse("[100/]"),
Some((
"",
Cookie {
value: "[100/]".into()
}
))
);
assert_eq!(
Cookie::parse("[/100]"),
Some((
"",
Cookie {
value: "[/100]".into()
}
))
);
assert!(Cookie::parse("[10% ]").is_none());
assert!(Cookie::parse("[1//100]").is_none());
assert!(Cookie::parse("[1\\100]").is_none());
assert!(Cookie::parse("[10%%]").is_none());
}

View file

@ -1,121 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while1},
character::complete::space0,
sequence::delimited,
IResult,
};
use crate::parse::combinators::{blank_lines_count, eol, lines_till};
/// Drawer Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Drawer<'a> {
/// Drawer name
pub name: Cow<'a, str>,
/// Numbers of blank lines between first drawer's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last drawer's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl Drawer<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (Drawer, &str))> {
parse_drawer(input).ok()
}
pub fn into_owned(self) -> Drawer<'static> {
Drawer {
name: self.name.into_owned().into(),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
#[inline]
pub fn parse_drawer(input: &str) -> IResult<&str, (Drawer, &str), ()> {
let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?;
let (content, blank) = blank_lines_count(content)?;
drawer.pre_blank = blank;
let (input, blank) = blank_lines_count(input)?;
drawer.post_blank = blank;
Ok((input, (drawer, content)))
}
pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str), ()> {
let (input, _) = space0(input)?;
let (input, name) = delimited(
tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
Ok((
input,
(
Drawer {
name: name.into(),
pre_blank: 0,
post_blank: 0,
},
contents,
),
))
}
#[test]
fn parse() {
assert_eq!(
parse_drawer(
r#":PROPERTIES:
:CUSTOM_ID: id
:END:"#
),
Ok((
"",
(
Drawer {
name: "PROPERTIES".into(),
pre_blank: 0,
post_blank: 0
},
" :CUSTOM_ID: id\n"
)
))
);
assert_eq!(
parse_drawer(
r#":PROPERTIES:
:END:
"#
),
Ok((
"",
(
Drawer {
name: "PROPERTIES".into(),
pre_blank: 2,
post_blank: 1,
},
""
)
))
);
// https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer(":SPAGHETTI:\n").is_err());
}

View file

@ -1,99 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0, space1},
IResult,
};
use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Dynamic Block Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct DynBlock<'a> {
/// Block name
pub block_name: Cow<'a, str>,
/// Block argument
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub arguments: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last drawer's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl DynBlock<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (DynBlock, &str))> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> DynBlock<'static> {
DynBlock {
block_name: self.block_name.into_owned().into(),
arguments: self.arguments.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, (DynBlock, &str), ()> {
let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;
let (input, args) = line(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?;
let (contents, pre_blank) = blank_lines_count(contents)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
DynBlock {
block_name: name.into(),
arguments: if args.trim().is_empty() {
None
} else {
Some(args.trim().into())
},
pre_blank,
post_blank,
},
contents,
),
))
}
#[test]
fn parse() {
// TODO: testing
assert_eq!(
DynBlock::parse(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#
),
Some((
"",
(
DynBlock {
block_name: "clocktable".into(),
arguments: Some(":scope file".into()),
pre_blank: 2,
post_blank: 1,
},
"CONTENTS\n"
)
))
);
}

View file

@ -1,113 +0,0 @@
use bytecount::count;
use memchr::memchr_iter;
use crate::elements::Element;
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct Emphasis<'a> {
marker: u8,
contents: &'a str,
}
impl<'a> Emphasis<'a> {
pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> {
if text.len() < 3 {
return None;
}
let bytes = text.as_bytes();
if bytes[1].is_ascii_whitespace() {
return None;
}
for i in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if i == 1 {
continue;
} else if count(&bytes[1..i], b'\n') >= 2 {
break;
} else if validate_marker(i, text) {
return Some((
&text[i + 1..],
Emphasis {
marker,
contents: &text[1..i],
},
));
}
}
None
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let Emphasis { marker, contents } = self;
let element = match marker {
b'*' => Element::Bold,
b'+' => Element::Strike,
b'/' => Element::Italic,
b'_' => Element::Underline,
b'=' => Element::Verbatim {
value: contents.into(),
},
b'~' => Element::Code {
value: contents.into(),
},
_ => unreachable!(),
};
(element, contents)
}
}
fn validate_marker(pos: usize, text: &str) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(&post) = text.as_bytes().get(pos + 1) {
match post {
b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true,
_ => false,
}
} else {
true
}
}
#[test]
fn parse() {
assert_eq!(
Emphasis::parse("*bold*", b'*'),
Some((
"",
Emphasis {
contents: "bold",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo*ld*", b'*'),
Some((
"",
Emphasis {
contents: "bo*ld",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo\nld*", b'*'),
Some((
"",
Emphasis {
contents: "bo\nld",
marker: b'*'
}
))
);
assert_eq!(Emphasis::parse("*bold*a", b'*'), None);
assert_eq!(Emphasis::parse("*bold*", b'/'), None);
assert_eq!(Emphasis::parse("*bold *", b'*'), None);
assert_eq!(Emphasis::parse("* bold*", b'*'), None);
assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None);
}

View file

@ -1,80 +0,0 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct FixedWidth<'a> {
/// Fixed width value
pub value: Cow<'a, str>,
/// Numbers of blank lines between last fixed width's line and next
/// non-blank line or buffer's end
pub post_blank: usize,
}
impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FixedWidth<'static> {
FixedWidth {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, FixedWidth, ()> {
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
FixedWidth {
value: value.into(),
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
FixedWidth::parse(
r#": A
:
: B
: C
"#
),
Some((
"",
FixedWidth {
value: r#": A
:
: B
: C
"#
.into(),
post_blank: 1
}
))
);
}

View file

@ -1,117 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while1},
sequence::delimited,
IResult,
};
use crate::parse::combinators::{blank_lines_count, line};
/// Footnote Definition Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Default, Clone)]
pub struct FnDef<'a> {
/// Footnote label, used for reference
pub label: Cow<'a, str>,
/// Numbers of blank lines between last footnote definition's line and next
/// non-blank line or buffer's end
pub post_blank: usize,
}
impl FnDef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FnDef<'static> {
FnDef {
label: self.label.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, (FnDef, &str), ()> {
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
FnDef {
label: label.into(),
post_blank,
},
content,
),
))
}
#[test]
fn parse() {
assert_eq!(
FnDef::parse("[fn:1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "1".into(),
post_blank: 0
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:word_1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "word_1".into(),
post_blank: 0,
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:WORD-1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "WORD-1".into(),
post_blank: 0,
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:WORD]"),
Some((
"",
(
FnDef {
label: "WORD".into(),
post_blank: 0,
},
""
)
))
);
assert!(FnDef::parse("[fn:] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none());
}

View file

@ -1,111 +0,0 @@
use std::borrow::Cow;
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
error::{make_error, ErrorKind},
sequence::preceded,
Err, IResult,
};
/// Footnote Reference Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct FnRef<'a> {
/// Footnote label
pub label: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub definition: Option<Cow<'a, str>>,
}
impl FnRef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FnRef)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FnRef<'static> {
FnRef {
label: self.label.into_owned().into(),
definition: self.definition.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, FnRef, ()> {
let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?;
let (input, _) = tag("]")(input)?;
Ok((
input,
FnRef {
label: label.into(),
definition: definition.map(Into::into),
},
))
}
fn balanced_brackets(input: &str) -> IResult<&str, &str, ()> {
let mut pairs = 1;
for i in memchr2_iter(b'[', b']', input.as_bytes()) {
if input.as_bytes()[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok((&input[i..], &input[0..i]));
}
}
Err(Err::Error(make_error(input, ErrorKind::Tag)))
}
#[test]
fn parse() {
assert_eq!(
FnRef::parse("[fn:1]"),
Some((
"",
FnRef {
label: "1".into(),
definition: None
},
))
);
assert_eq!(
FnRef::parse("[fn:1:2]"),
Some((
"",
FnRef {
label: "1".into(),
definition: Some("2".into())
},
))
);
assert_eq!(
FnRef::parse("[fn::2]"),
Some((
"",
FnRef {
label: "".into(),
definition: Some("2".into())
},
))
);
assert_eq!(
FnRef::parse("[fn::[]]"),
Some((
"",
FnRef {
label: "".into(),
definition: Some("[]".into())
},
))
);
assert!(FnRef::parse("[fn::[]").is_none());
}

View file

@ -1,122 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till},
combinator::opt,
sequence::{delimited, preceded},
IResult,
};
/// Inline Babel Call Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Default, Clone)]
pub struct InlineCall<'a> {
/// Called code block name
pub name: Cow<'a, str>,
/// Header arguments applied to the code block
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub inside_header: Option<Cow<'a, str>>,
/// Argument passed to the code block
pub arguments: Cow<'a, str>,
/// Header arguments applied to the calling instance
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub end_header: Option<Cow<'a, str>>,
}
impl InlineCall<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, InlineCall)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> InlineCall<'static> {
InlineCall {
name: self.name.into_owned().into(),
arguments: self.arguments.into_owned().into(),
inside_header: self.inside_header.map(Into::into).map(Cow::Owned),
end_header: self.end_header.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, InlineCall, ()> {
let (input, name) = preceded(
tag("call_"),
take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'),
)(input)?;
let (input, inside_header) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, arguments) =
delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?;
let (input, end_header) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
Ok((
input,
InlineCall {
name: name.into(),
arguments: arguments.into(),
inside_header: inside_header.map(Into::into),
end_header: end_header.map(Into::into),
},
))
}
#[test]
fn parse() {
assert_eq!(
InlineCall::parse("call_square(4)"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: None,
end_header: None,
}
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: Some(":results output".into()),
end_header: None,
},
))
);
assert_eq!(
InlineCall::parse("call_square(4)[:results html]"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: None,
end_header: Some(":results html".into()),
},
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)[:results html]"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: Some(":results output".into()),
end_header: Some(":results html".into()),
},
))
);
}

View file

@ -1,88 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till, take_while1},
combinator::opt,
sequence::delimited,
IResult,
};
/// Inline Src Block Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct InlineSrc<'a> {
/// Language of the code
pub lang: Cow<'a, str>,
/// Optional header arguments
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub options: Option<Cow<'a, str>>,
/// Source code
pub body: Cow<'a, str>,
}
impl InlineSrc<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, InlineSrc)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> InlineSrc<'static> {
InlineSrc {
lang: self.lang.into_owned().into(),
options: self.options.map(Into::into).map(Cow::Owned),
body: self.body.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, InlineSrc, ()> {
let (input, _) = tag("src_")(input)?;
let (input, lang) =
take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{')(input)?;
let (input, options) = opt(delimited(
tag("["),
take_till(|c| c == '\n' || c == ']'),
tag("]"),
))(input)?;
let (input, body) = delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?;
Ok((
input,
InlineSrc {
lang: lang.into(),
options: options.map(Into::into),
body: body.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
InlineSrc::parse("src_C{int a = 0;}"),
Some((
"",
InlineSrc {
lang: "C".into(),
options: None,
body: "int a = 0;".into()
},
))
);
assert_eq!(
InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>}"),
Some((
"",
InlineSrc {
lang: "xml".into(),
options: Some(":exports code".into()),
body: "<tag>text</tag>".into(),
},
))
);
assert!(InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>").is_none());
assert!(InlineSrc::parse("src_[:exports code]{<tag>text</tag>}").is_none());
assert!(InlineSrc::parse("src_xml[:exports code]").is_none());
}

View file

@ -1,230 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till},
character::complete::space0,
combinator::opt,
sequence::delimited,
IResult,
};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line};
/// Keyword Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Keyword<'a> {
/// Keyword name
pub key: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub optional: Option<Cow<'a, str>>,
/// Keyword value
pub value: Cow<'a, str>,
/// Numbers of blank lines between keyword line and next non-blank line or
/// buffer's end
pub post_blank: usize,
}
impl Keyword<'_> {
pub fn into_owned(self) -> Keyword<'static> {
Keyword {
key: self.key.into_owned().into(),
optional: self.optional.map(Into::into).map(Cow::Owned),
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Babel Call Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct BabelCall<'a> {
/// Babel call value
pub value: Cow<'a, str>,
/// Numbers of blank lines between babel call line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl BabelCall<'_> {
pub fn into_owned(self) -> BabelCall<'static> {
BabelCall {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawKeyword<'a> {
pub key: &'a str,
pub value: &'a str,
pub optional: Option<&'a str>,
pub post_blank: usize,
}
impl<'a> RawKeyword<'a> {
pub fn parse(input: &str) -> Option<(&str, RawKeyword)> {
parse_internal(input).ok()
}
pub fn into_element(self) -> Element<'a> {
let RawKeyword {
key,
value,
optional,
post_blank,
} = self;
if (&*key).eq_ignore_ascii_case("CALL") {
BabelCall {
value: value.into(),
post_blank,
}
.into()
} else {
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank,
}
.into()
}
}
}
fn parse_internal(input: &str) -> IResult<&str, RawKeyword, ()> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawKeyword {
key,
optional,
value: value.trim(),
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
RawKeyword::parse("#+KEY:"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+KEY: VALUE"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+K_E_Y: VALUE"),
Some((
"",
RawKeyword {
key: "K_E_Y",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+KEY:VALUE\n"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert!(RawKeyword::parse("#+KE Y: VALUE").is_none());
assert!(RawKeyword::parse("#+ KEY: VALUE").is_none());
assert_eq!(
RawKeyword::parse("#+RESULTS:"),
Some((
"",
RawKeyword {
key: "RESULTS",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some((
"",
RawKeyword {
key: "ATTR_LATEX",
optional: None,
value: ":width 5cm",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+CALL: double(n=4)"),
Some((
"",
RawKeyword {
key: "CALL",
optional: None,
value: "double(n=4)",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+CAPTION[Short caption]: Longer caption."),
Some((
"",
RawKeyword {
key: "CAPTION",
optional: Some("Short caption"),
value: "Longer caption.",
post_blank: 0
}
))
);
}

View file

@ -1,80 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::delimited,
IResult,
};
/// Link Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Link<'a> {
/// Link destination
pub path: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub desc: Option<Cow<'a, str>>,
}
impl Link<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, Link)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Link<'static> {
Link {
path: self.path.into_owned().into(),
desc: self.desc.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Link, ()> {
let (input, path) = delimited(
tag("[["),
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
tag("]"),
)(input)?;
let (input, desc) = opt(delimited(
tag("["),
take_while(|c: char| c != '[' && c != ']'),
tag("]"),
))(input)?;
let (input, _) = tag("]")(input)?;
Ok((
input,
Link {
path: path.into(),
desc: desc.map(Into::into),
},
))
}
#[test]
fn parse() {
assert_eq!(
Link::parse("[[#id]]"),
Some((
"",
Link {
path: "#id".into(),
desc: None
}
))
);
assert_eq!(
Link::parse("[[#id][desc]]"),
Some((
"",
Link {
path: "#id".into(),
desc: Some("desc".into())
}
))
);
assert!(Link::parse("[[#id][desc]").is_none());
}

View file

@ -1,316 +0,0 @@
use std::borrow::Cow;
use std::iter::once;
use memchr::{memchr, memchr_iter};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, space0},
combinator::{map, recognize},
sequence::terminated,
IResult,
};
/// Plain List Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct List {
/// List indent, number of whitespaces
pub indent: usize,
/// List's type, determined by the first item of this list
pub ordered: bool,
/// Numbers of blank lines between last list's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
/// List Item Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct ListItem<'a> {
/// List item bullet
pub bullet: Cow<'a, str>,
/// List item indent, number of whitespaces
pub indent: usize,
/// List item type
pub ordered: bool,
// TODO checkbox
// TODO counter
// TODO tag
}
impl ListItem<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, (ListItem, &str))> {
list_item(input).ok()
}
pub fn into_owned(self) -> ListItem<'static> {
ListItem {
bullet: self.bullet.into_owned().into(),
indent: self.indent,
ordered: self.ordered,
}
}
}
fn list_item(input: &str) -> IResult<&str, (ListItem, &str), ()> {
let (input, indent) = map(space0, |s: &str| s.len())(input)?;
let (input, bullet) = recognize(alt((
tag("+ "),
tag("* "),
tag("- "),
terminated(digit1, tag(". ")),
)))(input)?;
let (input, contents) = list_item_contents(input, indent);
Ok((
input,
(
ListItem {
bullet: bullet.into(),
indent,
ordered: bullet.starts_with(|c: char| c.is_ascii_digit()),
},
contents,
),
))
}
fn list_item_contents(input: &str, indent: usize) -> (&str, &str) {
let mut last_end = memchr(b'\n', input.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| input.len());
for i in memchr_iter(b'\n', input.as_bytes())
.map(|i| i + 1)
.chain(once(input.len()))
.skip(1)
{
if input[last_end..i]
.as_bytes()
.iter()
.all(u8::is_ascii_whitespace)
{
let x = memchr(b'\n', &input[i..].as_bytes())
.map(|ii| i + ii + 1)
.unwrap_or_else(|| input.len());
// two consecutive empty lines
if input[i..x].as_bytes().iter().all(u8::is_ascii_whitespace) {
return (&input[x..], &input[0..x]);
}
}
// line less or equally indented than the starting line
if input[last_end..i]
.as_bytes()
.iter()
.take(indent + 1)
.any(|c| !c.is_ascii_whitespace())
{
return (&input[last_end..], &input[0..last_end]);
}
last_end = i;
}
("", input)
}
#[test]
fn parse() {
assert_eq!(
list_item(
r#"+ item1
+ item2"#
),
Ok((
"+ item2",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
* item2"#
),
Ok((
"* item2",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
* item2"#
),
Ok((
"* item2",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
"#
),
Ok((
"",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"+ item1
+ item2
"#
),
Ok((
"",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
+ item2
"#
)
))
);
assert_eq!(
list_item(
r#"+ item1
+ item2
+ item 3"#
),
Ok((
"+ item 3",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
+ item2
"#
)
))
);
assert_eq!(
list_item(
r#" + item1
+ item2"#
),
Ok((
" + item2",
(
ListItem {
bullet: "+ ".into(),
indent: 2,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#" 1. item1
2. item2
3. item3"#
),
Ok((
r#"2. item2
3. item3"#,
(
ListItem {
bullet: "1. ".into(),
indent: 2,
ordered: true,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"+ 1
- 2
- 3
+ 4"#
),
Ok((
"+ 4",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"1
- 2
- 3
"#
)
))
);
}

View file

@ -1,91 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
combinator::{opt, verify},
sequence::delimited,
IResult,
};
/// Macro Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Macros<'a> {
/// Macro name
pub name: Cow<'a, str>,
/// Arguments passed to the macro
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub arguments: Option<Cow<'a, str>>,
}
impl Macros<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Macros)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Macros<'static> {
Macros {
name: self.name.into_owned().into(),
arguments: self.arguments.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Macros, ()> {
let (input, _) = tag("{{{")(input)?;
let (input, name) = verify(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()),
)(input)?;
let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?;
let (input, _) = tag("}}}")(input)?;
Ok((
input,
Macros {
name: name.into(),
arguments: arguments.map(Into::into),
},
))
}
#[test]
fn test() {
assert_eq!(
Macros::parse("{{{poem(red,blue)}}}"),
Some((
"",
Macros {
name: "poem".into(),
arguments: Some("red,blue".into())
}
))
);
assert_eq!(
Macros::parse("{{{poem())}}}"),
Some((
"",
Macros {
name: "poem".into(),
arguments: Some(")".into())
}
))
);
assert_eq!(
Macros::parse("{{{author}}}"),
Some((
"",
Macros {
name: "author".into(),
arguments: None
}
))
);
assert!(Macros::parse("{{{0uthor}}}").is_none());
assert!(Macros::parse("{{{author}}").is_none());
assert!(Macros::parse("{{{poem(}}}").is_none());
assert!(Macros::parse("{{{poem)}}}").is_none());
}

View file

@ -1,245 +0,0 @@
//! Org-mode elements
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod comment;
pub(crate) mod cookie;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod emphasis;
pub(crate) mod fixed_width;
pub(crate) mod fn_def;
pub(crate) mod fn_ref;
pub(crate) mod inline_call;
pub(crate) mod inline_src;
pub(crate) mod keyword;
pub(crate) mod link;
pub(crate) mod list;
pub(crate) mod macros;
pub(crate) mod planning;
pub(crate) mod radio_target;
pub(crate) mod rule;
pub(crate) mod snippet;
pub(crate) mod table;
pub(crate) mod target;
pub(crate) mod timestamp;
pub(crate) mod title;
pub use self::{
block::{
CenterBlock, CommentBlock, ExampleBlock, ExportBlock, QuoteBlock, SourceBlock,
SpecialBlock, VerseBlock,
},
clock::Clock,
comment::Comment,
cookie::Cookie,
drawer::Drawer,
dyn_block::DynBlock,
fixed_width::FixedWidth,
fn_def::FnDef,
fn_ref::FnRef,
inline_call::InlineCall,
inline_src::InlineSrc,
keyword::{BabelCall, Keyword},
link::Link,
list::{List, ListItem},
macros::Macros,
planning::Planning,
rule::Rule,
snippet::Snippet,
table::{Table, TableCell, TableRow},
target::Target,
timestamp::{Datetime, Timestamp},
title::{PropertiesMap, Title},
};
use std::borrow::Cow;
/// Element Enum
#[derive(Debug)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "type", rename_all = "kebab-case"))]
pub enum Element<'a> {
SpecialBlock(SpecialBlock<'a>),
QuoteBlock(QuoteBlock<'a>),
CenterBlock(CenterBlock<'a>),
VerseBlock(VerseBlock<'a>),
CommentBlock(CommentBlock<'a>),
ExampleBlock(ExampleBlock<'a>),
ExportBlock(ExportBlock<'a>),
SourceBlock(SourceBlock<'a>),
BabelCall(BabelCall<'a>),
Section,
Clock(Clock<'a>),
Cookie(Cookie<'a>),
RadioTarget,
Drawer(Drawer<'a>),
Document { pre_blank: usize },
DynBlock(DynBlock<'a>),
FnDef(FnDef<'a>),
FnRef(FnRef<'a>),
Headline { level: usize },
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Keyword(Keyword<'a>),
Link(Link<'a>),
List(List),
ListItem(ListItem<'a>),
Macros(Macros<'a>),
Snippet(Snippet<'a>),
Text { value: Cow<'a, str> },
Paragraph { post_blank: usize },
Rule(Rule),
Timestamp(Timestamp<'a>),
Target(Target<'a>),
Bold,
Strike,
Italic,
Underline,
Verbatim { value: Cow<'a, str> },
Code { value: Cow<'a, str> },
Comment(Comment<'a>),
FixedWidth(FixedWidth<'a>),
Title(Title<'a>),
Table(Table<'a>),
TableRow(TableRow),
TableCell(TableCell),
}
impl Element<'_> {
pub fn is_container(&self) -> bool {
match self {
Element::SpecialBlock(_)
| Element::QuoteBlock(_)
| Element::CenterBlock(_)
| Element::VerseBlock(_)
| Element::Bold
| Element::Document { .. }
| Element::DynBlock(_)
| Element::Headline { .. }
| Element::Italic
| Element::List(_)
| Element::ListItem(_)
| Element::Paragraph { .. }
| Element::Section
| Element::Strike
| Element::Underline
| Element::Title(_)
| Element::Table(_)
| Element::TableRow(TableRow::Header)
| Element::TableRow(TableRow::Body)
| Element::TableCell(_) => true,
_ => false,
}
}
pub fn into_owned(self) -> Element<'static> {
use Element::*;
match self {
SpecialBlock(e) => SpecialBlock(e.into_owned()),
QuoteBlock(e) => QuoteBlock(e.into_owned()),
CenterBlock(e) => CenterBlock(e.into_owned()),
VerseBlock(e) => VerseBlock(e.into_owned()),
CommentBlock(e) => CommentBlock(e.into_owned()),
ExampleBlock(e) => ExampleBlock(e.into_owned()),
ExportBlock(e) => ExportBlock(e.into_owned()),
SourceBlock(e) => SourceBlock(e.into_owned()),
BabelCall(e) => BabelCall(e.into_owned()),
Section => Section,
Clock(e) => Clock(e.into_onwed()),
Cookie(e) => Cookie(e.into_owned()),
RadioTarget => RadioTarget,
Drawer(e) => Drawer(e.into_owned()),
Document { pre_blank } => Document { pre_blank },
DynBlock(e) => DynBlock(e.into_owned()),
FnDef(e) => FnDef(e.into_owned()),
FnRef(e) => FnRef(e.into_owned()),
Headline { level } => Headline { level },
InlineCall(e) => InlineCall(e.into_owned()),
InlineSrc(e) => InlineSrc(e.into_owned()),
Keyword(e) => Keyword(e.into_owned()),
Link(e) => Link(e.into_owned()),
List(e) => List(e),
ListItem(e) => ListItem(e.into_owned()),
Macros(e) => Macros(e.into_owned()),
Snippet(e) => Snippet(e.into_owned()),
Text { value } => Text {
value: value.into_owned().into(),
},
Paragraph { post_blank } => Paragraph { post_blank },
Rule(e) => Rule(e),
Timestamp(e) => Timestamp(e.into_owned()),
Target(e) => Target(e.into_owned()),
Bold => Bold,
Strike => Strike,
Italic => Italic,
Underline => Underline,
Verbatim { value } => Verbatim {
value: value.into_owned().into(),
},
Code { value } => Code {
value: value.into_owned().into(),
},
Comment(e) => Comment(e.into_owned()),
FixedWidth(e) => FixedWidth(e.into_owned()),
Title(e) => Title(e.into_owned()),
Table(e) => Table(e.into_owned()),
TableRow(e) => TableRow(e),
TableCell(e) => TableCell(e),
}
}
}
macro_rules! impl_from {
($($ele0:ident),*; $($ele1:ident),*) => {
$(
impl<'a> From<$ele0<'a>> for Element<'a> {
fn from(ele: $ele0<'a>) -> Element<'a> {
Element::$ele0(ele)
}
}
)*
$(
impl<'a> From<$ele1> for Element<'a> {
fn from(ele: $ele1) -> Element<'a> {
Element::$ele1(ele)
}
}
)*
};
}
impl_from!(
BabelCall,
CenterBlock,
Clock,
Comment,
CommentBlock,
Cookie,
Drawer,
DynBlock,
ExampleBlock,
ExportBlock,
FixedWidth,
FnDef,
FnRef,
InlineCall,
InlineSrc,
Keyword,
Link,
ListItem,
Macros,
QuoteBlock,
Snippet,
SourceBlock,
SpecialBlock,
Table,
Target,
Timestamp,
Title,
VerseBlock;
List,
Rule,
TableRow
);

View file

@ -1,98 +0,0 @@
use memchr::memchr;
use crate::elements::Timestamp;
/// Planning element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Planning<'a> {
/// Timestamp associated to deadline keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub deadline: Option<Timestamp<'a>>,
/// Timestamp associated to scheduled keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub scheduled: Option<Timestamp<'a>>,
/// Timestamp associated to closed keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub closed: Option<Timestamp<'a>>,
}
impl Planning<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Planning)> {
let (mut deadline, mut scheduled, mut closed) = (None, None, None);
let (mut tail, off) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
while let Some(i) = memchr(b' ', tail.as_bytes()) {
let next = &tail[i + 1..].trim_start();
macro_rules! set_timestamp {
($timestamp:expr) => {{
let (new_tail, timestamp) =
Timestamp::parse_active(next).or(Timestamp::parse_inactive(next))?;
$timestamp = Some(timestamp);
tail = new_tail.trim_start();
}};
}
match &tail[..i] {
"DEADLINE:" if deadline.is_none() => set_timestamp!(deadline),
"SCHEDULED:" if scheduled.is_none() => set_timestamp!(scheduled),
"CLOSED:" if closed.is_none() => set_timestamp!(closed),
_ => return None,
}
}
if deadline.is_none() && scheduled.is_none() && closed.is_none() {
None
} else {
Some((
&text[off..],
Planning {
deadline,
scheduled,
closed,
},
))
}
}
pub fn into_owned(self) -> Planning<'static> {
Planning {
deadline: self.deadline.map(|x| x.into_owned()),
scheduled: self.scheduled.map(|x| x.into_owned()),
closed: self.closed.map(|x| x.into_owned()),
}
}
}
#[test]
fn prase() {
use crate::elements::Datetime;
assert_eq!(
Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"),
Some((
"",
Planning {
scheduled: Some(Timestamp::Active {
start: Datetime {
year: 2019,
month: 4,
day: 8,
dayname: "Mon".into(),
hour: None,
minute: None
},
repeater: None,
delay: None
}),
deadline: None,
closed: None,
}
))
)
}

View file

@ -1,40 +0,0 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[inline]
pub fn parse_radio_target(input: &str) -> Option<(&str, &str)> {
parse_internal(input).ok()
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, &str, ()> {
let (input, contents) = delimited(
tag("<<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>>"),
)(input)?;
Ok((input, contents))
}
#[test]
fn parse() {
assert_eq!(parse_radio_target("<<<target>>>"), Some(("", "target")));
assert_eq!(parse_radio_target("<<<tar get>>>"), Some(("", "tar get")));
assert!(parse_radio_target("<<<target >>>").is_none());
assert!(parse_radio_target("<<< target>>>").is_none());
assert!(parse_radio_target("<<<ta<get>>>").is_none());
assert!(parse_radio_target("<<<ta>get>>>").is_none());
assert!(parse_radio_target("<<<ta\nget>>>").is_none());
assert!(parse_radio_target("<<<target>>").is_none());
}

View file

@ -1,48 +0,0 @@
use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult};
use crate::parse::combinators::{blank_lines_count, eol};
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Rule {
/// Numbers of blank lines between rule line and next non-blank line or
/// buffer's end
pub post_blank: usize,
}
impl Rule {
pub(crate) fn parse(input: &str) -> Option<(&str, Rule)> {
parse_internal(input).ok()
}
}
fn parse_internal(input: &str) -> IResult<&str, Rule, ()> {
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((input, Rule { post_blank }))
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----"), Some(("", Rule { post_blank: 0 })));
assert_eq!(Rule::parse("--------"), Some(("", Rule { post_blank: 0 })));
assert_eq!(
Rule::parse("-----\n\n\n"),
Some(("", Rule { post_blank: 2 }))
);
assert_eq!(Rule::parse("----- \n"), Some(("", Rule { post_blank: 0 })));
assert!(Rule::parse("").is_none());
assert!(Rule::parse("----").is_none());
assert!(Rule::parse("----").is_none());
assert!(Rule::parse("None----").is_none());
assert!(Rule::parse("None ----").is_none());
assert!(Rule::parse("None------").is_none());
assert!(Rule::parse("----None----").is_none());
assert!(Rule::parse("\t\t----").is_none());
assert!(Rule::parse("------None").is_none());
assert!(Rule::parse("----- None").is_none());
}

View file

@ -1,100 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
sequence::{delimited, separated_pair},
IResult,
};
/// Export Snippet Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Snippet<'a> {
/// Back-end name
pub name: Cow<'a, str>,
/// Export code
pub value: Cow<'a, str>,
}
impl Snippet<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Snippet)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Snippet<'static> {
Snippet {
name: self.name.into_owned().into(),
value: self.value.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Snippet, ()> {
let (input, (name, value)) = delimited(
tag("@@"),
separated_pair(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
tag(":"),
take_until("@@"),
),
take(2usize),
)(input)?;
Ok((
input,
Snippet {
name: name.into(),
value: value.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Snippet::parse("@@html:<b>@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "<b>".into()
}
))
);
assert_eq!(
Snippet::parse("@@latex:any arbitrary LaTeX code@@"),
Some((
"",
Snippet {
name: "latex".into(),
value: "any arbitrary LaTeX code".into(),
}
))
);
assert_eq!(
Snippet::parse("@@html:@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "".into(),
}
))
);
assert_eq!(
Snippet::parse("@@html:<p>@</p>@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "<p>@</p>".into(),
}
))
);
assert!(Snippet::parse("@@html:<b>@").is_none());
assert!(Snippet::parse("@@html<b>@@").is_none());
assert!(Snippet::parse("@@:<b>@@").is_none());
}

View file

@ -1,169 +0,0 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, line, lines_while};
/// Table Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_type"))]
pub enum Table<'a> {
/// "org" type table
#[cfg_attr(feature = "ser", serde(rename = "org"))]
Org {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
tblfm: Option<Cow<'a, str>>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
has_header: bool,
},
/// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))]
TableEl {
value: Cow<'a, str>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
}
impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table)> {
Self::parse_table_el_internal(input).ok()
}
fn parse_table_el_internal(input: &str) -> IResult<&str, Table, ()> {
let (_, first_line) = line(input)?;
let first_line = first_line.trim();
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-")
|| first_line
.as_bytes()
.iter()
.any(|&c| c != b'+' && c != b'-')
{
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
// Table.el tables end at the first line not starting with either a vertical line or a plus sign.
let (input, content) = lines_while(|line| {
let line = line.trim_start();
line.starts_with('|') || line.starts_with('+')
})(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
Table::TableEl {
value: content.into(),
post_blank,
},
))
}
pub fn into_owned(self) -> Table<'static> {
match self {
Table::Org {
tblfm,
post_blank,
has_header,
} => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank,
has_header,
},
Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(),
post_blank,
},
}
}
}
/// Table Row Element
///
/// # Syntax
///
/// ```text
/// | 0 | 1 | 2 | <- TableRow::Body
/// | 0 | 1 | 2 | <- TableRow::Body
/// ```
///
/// ```text
/// |-----+-----+-----| <- ignores
/// | 0 | 1 | 2 | <- TableRow::Header
/// | 0 | 1 | 2 | <- TableRow::Header
/// |-----+-----+-----| <- TableRow::HeaderRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- ignores
/// ```
///
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableRow {
/// This row is part of table header
Header,
/// This row is part of table body
Body,
/// This row is between table header and body
HeaderRule,
/// This row is between table body and next body
BodyRule,
}
/// Table Cell Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableCell {
/// Header cell
Header,
/// Body cell, or standard cell
Body,
}
#[test]
fn parse_table_el_() {
assert_eq!(
Table::parse_table_el(
r#" +---+
| |
+---+
"#
),
Some((
"",
Table::TableEl {
value: r#" +---+
| |
+---+
"#
.into(),
post_blank: 1
}
))
);
assert!(Table::parse_table_el("").is_none());
assert!(Table::parse_table_el("+----|---").is_none());
}

View file

@ -1,78 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
/// Target Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Target<'a> {
/// Target ID
pub target: Cow<'a, str>,
}
impl Target<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, Target)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Target<'static> {
Target {
target: self.target.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Target, ()> {
let (input, target) = delimited(
tag("<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>"),
)(input)?;
Ok((
input,
Target {
target: target.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Target::parse("<<target>>"),
Some((
"",
Target {
target: "target".into()
}
))
);
assert_eq!(
Target::parse("<<tar get>>"),
Some((
"",
Target {
target: "tar get".into()
}
))
);
assert!(Target::parse("<<target >>").is_none());
assert!(Target::parse("<< target>>").is_none());
assert!(Target::parse("<<ta<get>>").is_none());
assert!(Target::parse("<<ta>get>>").is_none());
assert!(Target::parse("<<ta\nget>>").is_none());
assert!(Target::parse("<<target>").is_none());
}

View file

@ -1,482 +0,0 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_till, take_while, take_while_m_n},
character::complete::{space0, space1},
combinator::{map, map_res, opt},
sequence::preceded,
IResult,
};
/// Datetime Struct
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Datetime<'a> {
pub year: u16,
pub month: u8,
pub day: u8,
pub dayname: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub hour: Option<u8>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub minute: Option<u8>,
}
impl Datetime<'_> {
pub fn into_owned(self) -> Datetime<'static> {
Datetime {
year: self.year,
month: self.month,
day: self.day,
dayname: self.dayname.into_owned().into(),
hour: self.hour,
minute: self.minute,
}
}
}
#[cfg(feature = "chrono")]
mod chrono {
use super::Datetime;
use chrono::*;
impl Into<NaiveDate> for Datetime<'_> {
fn into(self) -> NaiveDate {
(&self).into()
}
}
impl Into<NaiveTime> for Datetime<'_> {
fn into(self) -> NaiveTime {
(&self).into()
}
}
impl Into<NaiveDateTime> for Datetime<'_> {
fn into(self) -> NaiveDateTime {
(&self).into()
}
}
impl Into<DateTime<Utc>> for Datetime<'_> {
fn into(self) -> DateTime<Utc> {
(&self).into()
}
}
impl Into<NaiveDate> for &Datetime<'_> {
fn into(self) -> NaiveDate {
NaiveDate::from_ymd(self.year.into(), self.month.into(), self.day.into())
}
}
impl Into<NaiveTime> for &Datetime<'_> {
fn into(self) -> NaiveTime {
NaiveTime::from_hms(
self.hour.unwrap_or_default().into(),
self.minute.unwrap_or_default().into(),
0,
)
}
}
impl Into<NaiveDateTime> for &Datetime<'_> {
fn into(self) -> NaiveDateTime {
NaiveDateTime::new(self.into(), self.into())
}
}
impl Into<DateTime<Utc>> for &Datetime<'_> {
fn into(self) -> DateTime<Utc> {
DateTime::from_utc(self.into(), Utc)
}
}
}
/// Timestamp Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
#[cfg_attr(feature = "ser", serde(tag = "timestamp_type"))]
#[derive(Debug, Clone)]
pub enum Timestamp<'a> {
Active {
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
Inactive {
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
ActiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
InactiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
Diary {
value: Cow<'a, str>,
},
}
impl Timestamp<'_> {
pub(crate) fn parse_active(input: &str) -> Option<(&str, Timestamp)> {
parse_active(input).ok()
}
pub(crate) fn parse_inactive(input: &str) -> Option<(&str, Timestamp)> {
parse_inactive(input).ok()
}
pub(crate) fn parse_diary(input: &str) -> Option<(&str, Timestamp)> {
parse_diary(input).ok()
}
pub fn into_owned(self) -> Timestamp<'static> {
match self {
Timestamp::Active {
start,
repeater,
delay,
} => Timestamp::Active {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::Inactive {
start,
repeater,
delay,
} => Timestamp::Inactive {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::ActiveRange {
start,
end,
repeater,
delay,
} => Timestamp::ActiveRange {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::InactiveRange {
start,
end,
repeater,
delay,
} => Timestamp::InactiveRange {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::Diary { value } => Timestamp::Diary {
value: value.into_owned().into(),
},
}
}
}
pub fn parse_active(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("<")(input)?;
let (input, start) = parse_datetime(input)?;
if input.starts_with('-') {
let (input, (hour, minute)) = parse_time(&input[1..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
let mut end = start.clone();
end.hour = Some(hour);
end.minute = Some(minute);
return Ok((
input,
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
},
));
}
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
if input.starts_with("--<") {
let (input, end) = parse_datetime(&input["--<".len()..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
Ok((
input,
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
},
))
} else {
Ok((
input,
Timestamp::Active {
start,
repeater: None,
delay: None,
},
))
}
}
pub fn parse_inactive(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("[")(input)?;
let (input, start) = parse_datetime(input)?;
if input.starts_with('-') {
let (input, (hour, minute)) = parse_time(&input[1..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
let mut end = start.clone();
end.hour = Some(hour);
end.minute = Some(minute);
return Ok((
input,
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
},
));
}
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
if input.starts_with("--[") {
let (input, end) = parse_datetime(&input["--[".len()..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
Ok((
input,
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
},
))
} else {
Ok((
input,
Timestamp::Inactive {
start,
repeater: None,
delay: None,
},
))
}
}
pub fn parse_diary(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("<%%(")(input)?;
let (input, value) = take_till(|c| c == ')' || c == '>' || c == '\n')(input)?;
let (input, _) = tag(")>")(input)?;
Ok((
input,
Timestamp::Diary {
value: value.into(),
},
))
}
fn parse_time(input: &str) -> IResult<&str, (u8, u8), ()> {
let (input, hour) = map_res(take_while_m_n(1, 2, |c: char| c.is_ascii_digit()), |num| {
u8::from_str_radix(num, 10)
})(input)?;
let (input, _) = tag(":")(input)?;
let (input, minute) = map_res(take(2usize), |num| u8::from_str_radix(num, 10))(input)?;
Ok((input, (hour, minute)))
}
fn parse_datetime(input: &str) -> IResult<&str, Datetime, ()> {
let parse_u8 = |num| u8::from_str_radix(num, 10);
let (input, year) = map_res(take(4usize), |num| u16::from_str_radix(num, 10))(input)?;
let (input, _) = tag("-")(input)?;
let (input, month) = map_res(take(2usize), parse_u8)(input)?;
let (input, _) = tag("-")(input)?;
let (input, day) = map_res(take(2usize), parse_u8)(input)?;
let (input, _) = space1(input)?;
let (input, dayname) = take_while(|c: char| {
!c.is_ascii_whitespace()
&& !c.is_ascii_digit()
&& c != '+'
&& c != '-'
&& c != ']'
&& c != '>'
})(input)?;
let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| {
(time.map(|t| t.0), time.map(|t| t.1))
})(input)?;
Ok((
input,
Datetime {
year,
month,
day,
dayname: dayname.into(),
hour,
minute,
},
))
}
// TODO
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum RepeaterType {
// Cumulate,
// CatchUp,
// Restart,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum DelayType {
// All,
// First,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum TimeUnit {
// Hour,
// Day,
// Week,
// Month,
// Year,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub struct Repeater {
// pub ty: RepeaterType,
// pub value: usize,
// pub unit: TimeUnit,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub struct Delay {
// pub ty: DelayType,
// pub value: usize,
// pub unit: TimeUnit,
// }
#[test]
fn parse() {
assert_eq!(
parse_inactive("[2003-09-16 Tue]"),
Ok((
"",
Timestamp::Inactive {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: None,
minute: None
},
repeater: None,
delay: None,
},
))
);
assert_eq!(
parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"),
Ok((
"",
Timestamp::InactiveRange {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39),
},
repeater: None,
delay: None
},
))
);
assert_eq!(
parse_active("<2003-09-16 Tue 09:39-10:39>"),
Ok((
"",
Timestamp::ActiveRange {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39),
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39),
},
repeater: None,
delay: None
},
))
);
}

View file

@ -1,551 +0,0 @@
//! Headline Title
use std::collections::HashMap;
use std::{borrow::Cow, iter::FromIterator};
use memchr::memrchr2;
use nom::{
branch::alt,
bytes::complete::{tag, take_until, take_while},
character::complete::{anychar, line_ending, space1},
combinator::{map, opt, verify},
error::{make_error, ErrorKind},
multi::fold_many0,
sequence::{delimited, preceded},
Err, IResult,
};
use crate::{
config::ParseConfig,
elements::{drawer::parse_drawer_without_blank, Planning, Timestamp},
parse::combinators::{blank_lines_count, line, one_word},
};
/// Title Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Title<'a> {
/// Headline level, number of stars
pub level: usize,
/// Headline priority cookie
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub priority: Option<char>,
/// Headline title tags
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Vec::is_empty"))]
pub tags: Vec<Cow<'a, str>>,
/// Headline todo keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub keyword: Option<Cow<'a, str>>,
/// Raw headline's text, without the stars and the tags
pub raw: Cow<'a, str>,
/// Planning element associated to this headline
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub planning: Option<Box<Planning<'a>>>,
/// Property drawer associated to this headline
#[cfg_attr(
feature = "ser",
serde(skip_serializing_if = "PropertiesMap::is_empty")
)]
pub properties: PropertiesMap<'a>,
/// Numbers of blank lines between last title's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl Title<'_> {
pub(crate) fn parse<'a>(
input: &'a str,
config: &ParseConfig,
) -> Option<(&'a str, (Title<'a>, &'a str))> {
parse_title(input, config).ok()
}
// TODO: fn is_quoted(&self) -> bool { }
// TODO: fn is_footnote_section(&self) -> bool { }
/// Returns this headline's closed timestamp, or `None` if not set.
pub fn closed(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.closed.as_ref())
}
/// Returns this headline's scheduled timestamp, or `None` if not set.
pub fn scheduled(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.scheduled.as_ref())
}
/// Returns this headline's deadline timestamp, or `None` if not set.
pub fn deadline(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.deadline.as_ref())
}
/// Returns `true` if this headline is archived
pub fn is_archived(&self) -> bool {
self.tags.iter().any(|tag| tag == "ARCHIVE")
}
/// Returns `true` if this headline is commented
pub fn is_commented(&self) -> bool {
self.raw.starts_with("COMMENT")
&& (self.raw.len() == 7 || self.raw[7..].starts_with(char::is_whitespace))
}
pub fn into_owned(self) -> Title<'static> {
Title {
level: self.level,
priority: self.priority,
tags: self
.tags
.into_iter()
.map(|s| s.into_owned().into())
.collect(),
keyword: self.keyword.map(Into::into).map(Cow::Owned),
raw: self.raw.into_owned().into(),
planning: self.planning.map(|p| Box::new(p.into_owned())),
properties: self.properties.into_owned(),
post_blank: self.post_blank,
}
}
}
impl Default for Title<'_> {
fn default() -> Title<'static> {
Title {
level: 1,
priority: None,
tags: Vec::new(),
keyword: None,
raw: Cow::Borrowed(""),
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
}
}
}
/// Properties
#[derive(Default, Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct PropertiesMap<'a> {
pub pairs: Vec<(Cow<'a, str>, Cow<'a, str>)>,
}
impl<'a> PropertiesMap<'a> {
pub fn new() -> Self {
PropertiesMap { pairs: Vec::new() }
}
pub fn is_empty(&self) -> bool {
self.pairs.is_empty()
}
pub fn iter(&self) -> impl Iterator<Item = &(Cow<'a, str>, Cow<'a, str>)> {
self.pairs.iter()
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut (Cow<'a, str>, Cow<'a, str>)> {
self.pairs.iter_mut()
}
pub fn into_iter(self) -> impl Iterator<Item = (Cow<'a, str>, Cow<'a, str>)> {
self.pairs.into_iter()
}
pub fn into_hash_map(self) -> HashMap<Cow<'a, str>, Cow<'a, str>> {
self.pairs.into_iter().collect()
}
#[cfg(feature = "indexmap")]
pub fn into_index_map(self) -> indexmap::IndexMap<Cow<'a, str>, Cow<'a, str>> {
self.pairs.into_iter().collect()
}
pub fn into_owned(self) -> PropertiesMap<'static> {
self.pairs
.into_iter()
.map(|(k, v)| (k.into_owned().into(), v.into_owned().into()))
.collect()
}
}
impl<'a> FromIterator<(Cow<'a, str>, Cow<'a, str>)> for PropertiesMap<'a> {
fn from_iter<T: IntoIterator<Item = (Cow<'a, str>, Cow<'a, str>)>>(iter: T) -> Self {
let mut map = PropertiesMap::new();
map.pairs.extend(iter);
map
}
}
fn white_spaces_or_eol(input: &str) -> IResult<&str, &str, ()> {
alt((space1, line_ending))(input)
}
#[inline]
fn parse_title<'a>(
input: &'a str,
config: &ParseConfig,
) -> IResult<&'a str, (Title<'a>, &'a str), ()> {
let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?;
debug_assert!(level > 0);
let (input, keyword) = opt(preceded(
space1,
verify(one_word, |s: &str| {
config.todo_keywords.0.iter().any(|x| x == s)
|| config.todo_keywords.1.iter().any(|x| x == s)
}),
))(input)?;
let (input, priority) = opt(delimited(
space1,
delimited(
tag("[#"),
verify(anychar, |c: &char| c.is_ascii_uppercase()),
tag("]"),
),
white_spaces_or_eol,
))(input)?;
let (input, tail) = line(input)?;
let tail = tail.trim();
// tags can be separated by space or \t
let (raw, tags) = memrchr2(b' ', b'\t', tail.as_bytes())
.map(|i| (tail[0..i].trim(), &tail[i + 1..]))
.filter(|(_, x)| is_tag_line(x))
.unwrap_or((tail, ""));
let tags = tags
.split(':')
.filter(|s| !s.is_empty())
.map(Into::into)
.collect();
let (input, planning) = Planning::parse(input)
.map(|(input, planning)| (input, Some(Box::new(planning))))
.unwrap_or((input, None));
let (input, properties) = opt(parse_properties_drawer)(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
Title {
properties: properties.unwrap_or_default(),
level,
keyword: keyword.map(Into::into),
priority,
tags,
raw: raw.into(),
planning,
post_blank,
},
raw,
),
))
}
fn is_tag_line(input: &str) -> bool {
input.len() > 2
&& input.starts_with(':')
&& input.ends_with(':')
&& input.chars().all(|ch| {
ch.is_alphanumeric() || ch == '_' || ch == '@' || ch == '#' || ch == '%' || ch == ':'
})
}
#[inline]
fn parse_properties_drawer(input: &str) -> IResult<&str, PropertiesMap<'_>, ()> {
let (input, (drawer, content)) = parse_drawer_without_blank(input.trim_start())?;
if drawer.name != "PROPERTIES" {
return Err(Err::Error(make_error(input, ErrorKind::Tag)));
}
let (_, map) = fold_many0(
parse_node_property,
PropertiesMap::new,
|mut acc: PropertiesMap, (name, value)| {
acc.pairs.push((name.into(), value.into()));
acc
},
)(content)?;
Ok((input, map))
}
#[inline]
fn parse_node_property(input: &str) -> IResult<&str, (&str, &str), ()> {
let (input, _) = blank_lines_count(input)?;
let input = input.trim_start();
let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| {
s.trim_end_matches('+')
})(input)?;
let (input, value) = line(input)?;
Ok((input, (name, value.trim())))
}
#[test]
fn parse_title_() {
use crate::config::DEFAULT_CONFIG;
assert_eq!(
parse_title("**** DONE [#A] COMMENT Title :tag:a2%:", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: Some('A'),
raw: "COMMENT Title".into(),
tags: vec!["tag".into(), "a2%".into()],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** ToDO [#A] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "ToDO [#A] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"ToDO [#A] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** T0DO [#A] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "T0DO [#A] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"T0DO [#A] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** DONE [#1] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: None,
raw: "[#1] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#1] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** DONE [#a] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: None,
raw: "[#a] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#a] COMMENT Title"
)
))
);
// https://github.com/PoiScript/orgize/issues/20
assert_eq!(
parse_title("** DONE [#B]::", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 2,
keyword: Some("DONE".into()),
priority: None,
raw: "[#B]::".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#B]::"
)
))
);
assert_eq!(
parse_title("**** Title :tag:a2%", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "Title :tag:a2%".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title :tag:a2%"
)
))
);
assert_eq!(
parse_title("**** Title tag:a2%:", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "Title tag:a2%:".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title tag:a2%:"
)
))
);
assert_eq!(
parse_title(
"**** DONE Title",
&ParseConfig {
todo_keywords: (vec![], vec![]),
..Default::default()
}
),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "DONE Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"DONE Title"
)
))
);
assert_eq!(
parse_title(
"**** TASK [#A] Title",
&ParseConfig {
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
}
),
Ok((
"",
(
Title {
level: 4,
keyword: Some("TASK".into()),
priority: Some('A'),
raw: "Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title"
)
))
);
}
#[test]
fn parse_properties_drawer_() {
assert_eq!(
parse_properties_drawer(" :PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Ok((
"",
vec![("CUSTOM_ID".into(), "id".into())]
.into_iter()
.collect::<PropertiesMap>()
))
)
}
#[test]
#[cfg(feature = "indexmap")]
fn preserve_properties_drawer_order() {
let mut vec = Vec::default();
// Use a large number of properties to reduce false pass rate, since HashMap
// is non-deterministic. There are roughly 10^18 possible derangements of this sequence.
for i in 0..20 {
// Avoid alphabetic or numeric order.
let j = (i + 7) % 20;
vec.push((
Cow::Owned(format!(
"{}{}",
if i % 3 == 0 {
"FOO"
} else if i % 3 == 1 {
"QUX"
} else {
"BAR"
},
j
)),
Cow::Owned(i.to_string()),
));
}
let mut s = String::default();
for (k, v) in &vec {
s += &format!(" :{}: {}\n", k, v);
}
let drawer = format!(" :PROPERTIES:\n{}:END:\n", &s);
let map = parse_properties_drawer(&drawer).unwrap().1.into_index_map();
// indexmap should be in the same order as vector
for (left, right) in vec.iter().zip(map) {
assert_eq!(left, &right);
}
}

468
src/entities.rs Normal file
View file

@ -0,0 +1,468 @@
// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85
// nil -> false
// t -> true
// \x00A0 -> \\x00A0
#[rustfmt::skip]
pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[
// ("* Letters"
// Latin
("Agrave", "\\`{A}", false, "&Agrave;", "A", "À", "À"),
("agrave", "\\`{a}", false, "&agrave;", "a", "à", "à"),
("Aacute", "\\'{A}", false, "&Aacute;", "A", "Á", "Á"),
("aacute", "\\'{a}", false, "&aacute;", "a", "á", "á"),
("Acirc", "\\^{A}", false, "&Acirc;", "A", "Â", "Â"),
("acirc", "\\^{a}", false, "&acirc;", "a", "â", "â"),
("Amacr", "\\={A}", false, "&Amacr;", "A", "Ã", "Ã"),
("amacr", "\\={a}", false, "&amacr;", "a", "ã", "ã"),
("Atilde", "\\~{A}", false, "&Atilde;", "A", "Ã", "Ã"),
("atilde", "\\~{a}", false, "&atilde;", "a", "ã", "ã"),
("Auml", "\\\"{A}", false, "&Auml;", "Ae", "Ä", "Ä"),
("auml", "\\\"{a}", false, "&auml;", "ae", "ä", "ä"),
("Aring", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("AA", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("aring", "\\aa{}", false, "&aring;", "a", "å", "å"),
("AElig", "\\AE{}", false, "&AElig;", "AE", "Æ", "Æ"),
("aelig", "\\ae{}", false, "&aelig;", "ae", "æ", "æ"),
("Ccedil", "\\c{C}", false, "&Ccedil;", "C", "Ç", "Ç"),
("ccedil", "\\c{c}", false, "&ccedil;", "c", "ç", "ç"),
("Egrave", "\\`{E}", false, "&Egrave;", "E", "È", "È"),
("egrave", "\\`{e}", false, "&egrave;", "e", "è", "è"),
("Eacute", "\\'{E}", false, "&Eacute;", "E", "É", "É"),
("eacute", "\\'{e}", false, "&eacute;", "e", "é", "é"),
("Ecirc", "\\^{E}", false, "&Ecirc;", "E", "Ê", "Ê"),
("ecirc", "\\^{e}", false, "&ecirc;", "e", "ê", "ê"),
("Euml", "\\\"{E}", false, "&Euml;", "E", "Ë", "Ë"),
("euml", "\\\"{e}", false, "&euml;", "e", "ë", "ë"),
("Igrave", "\\`{I}", false, "&Igrave;", "I", "Ì", "Ì"),
("igrave", "\\`{i}", false, "&igrave;", "i", "ì", "ì"),
("Iacute", "\\'{I}", false, "&Iacute;", "I", "Í", "Í"),
("iacute", "\\'{i}", false, "&iacute;", "i", "í", "í"),
("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"),
("inodot", "\\i", false, "&inodot;", "i", "ı", "ı"),
("Icirc", "\\^{I}", false, "&Icirc;", "I", "Î", "Î"),
("icirc", "\\^{i}", false, "&icirc;", "i", "î", "î"),
("Iuml", "\\\"{I}", false, "&Iuml;", "I", "Ï", "Ï"),
("iuml", "\\\"{i}", false, "&iuml;", "i", "ï", "ï"),
("Ntilde", "\\~{N}", false, "&Ntilde;", "N", "Ñ", "Ñ"),
("ntilde", "\\~{n}", false, "&ntilde;", "n", "ñ", "ñ"),
("Ograve", "\\`{O}", false, "&Ograve;", "O", "Ò", "Ò"),
("ograve", "\\`{o}", false, "&ograve;", "o", "ò", "ò"),
("Oacute", "\\'{O}", false, "&Oacute;", "O", "Ó", "Ó"),
("oacute", "\\'{o}", false, "&oacute;", "o", "ó", "ó"),
("Ocirc", "\\^{O}", false, "&Ocirc;", "O", "Ô", "Ô"),
("ocirc", "\\^{o}", false, "&ocirc;", "o", "ô", "ô"),
("Otilde", "\\~{O}", false, "&Otilde;", "O", "Õ", "Õ"),
("otilde", "\\~{o}", false, "&otilde;", "o", "õ", "õ"),
("Ouml", "\\\"{O}", false, "&Ouml;", "Oe", "Ö", "Ö"),
("ouml", "\\\"{o}", false, "&ouml;", "oe", "ö", "ö"),
("Oslash", "\\O", false, "&Oslash;", "O", "Ø", "Ø"),
("oslash", "\\o{}", false, "&oslash;", "o", "ø", "ø"),
("OElig", "\\OE{}", false, "&OElig;", "OE", "OE", "Œ"),
("oelig", "\\oe{}", false, "&oelig;", "oe", "oe", "œ"),
("Scaron", "\\v{S}", false, "&Scaron;", "S", "S", "Š"),
("scaron", "\\v{s}", false, "&scaron;", "s", "s", "š"),
("szlig", "\\ss{}", false, "&szlig;", "ss", "ß", "ß"),
("Ugrave", "\\`{U}", false, "&Ugrave;", "U", "Ù", "Ù"),
("ugrave", "\\`{u}", false, "&ugrave;", "u", "ù", "ù"),
("Uacute", "\\'{U}", false, "&Uacute;", "U", "Ú", "Ú"),
("uacute", "\\'{u}", false, "&uacute;", "u", "ú", "ú"),
("Ucirc", "\\^{U}", false, "&Ucirc;", "U", "Û", "Û"),
("ucirc", "\\^{u}", false, "&ucirc;", "u", "û", "û"),
("Uuml", "\\\"{U}", false, "&Uuml;", "Ue", "Ü", "Ü"),
("uuml", "\\\"{u}", false, "&uuml;", "ue", "ü", "ü"),
("Yacute", "\\'{Y}", false, "&Yacute;", "Y", "Ý", "Ý"),
("yacute", "\\'{y}", false, "&yacute;", "y", "ý", "ý"),
("Yuml", "\\\"{Y}", false, "&Yuml;", "Y", "Y", "Ÿ"),
("yuml", "\\\"{y}", false, "&yuml;", "y", "ÿ", "ÿ"),
// Latin (special face)
("fnof", "\\textit{f}", false, "&fnof;", "f", "f", "ƒ"),
("real", "\\Re", true, "&real;", "R", "R", ""),
("image", "\\Im", true, "&image;", "I", "I", ""),
("weierp", "\\wp", true, "&weierp;", "P", "P", ""),
("ell", "\\ell", true, "&ell;", "ell", "ell", ""),
("imath", "\\imath", true, "&imath;", "[dotless i]", "dotless i", "ı"),
("jmath", "\\jmath", true, "&jmath;", "[dotless j]", "dotless j", "ȷ"),
// Greek
("Alpha", "A", false, "&Alpha;", "Alpha", "Alpha", "Α"),
("alpha", "\\alpha", true, "&alpha;", "alpha", "alpha", "α"),
("Beta", "B", false, "&Beta;", "Beta", "Beta", "Β"),
("beta", "\\beta", true, "&beta;", "beta", "beta", "β"),
("Gamma", "\\Gamma", true, "&Gamma;", "Gamma", "Gamma", "Γ"),
("gamma", "\\gamma", true, "&gamma;", "gamma", "gamma", "γ"),
("Delta", "\\Delta", true, "&Delta;", "Delta", "Delta", "Δ"),
("delta", "\\delta", true, "&delta;", "delta", "delta", "δ"),
("Epsilon", "E", false, "&Epsilon;", "Epsilon", "Epsilon", "Ε"),
("epsilon", "\\epsilon", true, "&epsilon;", "epsilon", "epsilon", "ε"),
("varepsilon", "\\varepsilon", true, "&epsilon;", "varepsilon", "varepsilon", "ε"),
("Zeta", "Z", false, "&Zeta;", "Zeta", "Zeta", "Ζ"),
("zeta", "\\zeta", true, "&zeta;", "zeta", "zeta", "ζ"),
("Eta", "H", false, "&Eta;", "Eta", "Eta", "Η"),
("eta", "\\eta", true, "&eta;", "eta", "eta", "η"),
("Theta", "\\Theta", true, "&Theta;", "Theta", "Theta", "Θ"),
("theta", "\\theta", true, "&theta;", "theta", "theta", "θ"),
("thetasym", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("vartheta", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("Iota", "I", false, "&Iota;", "Iota", "Iota", "Ι"),
("iota", "\\iota", true, "&iota;", "iota", "iota", "ι"),
("Kappa", "K", false, "&Kappa;", "Kappa", "Kappa", "Κ"),
("kappa", "\\kappa", true, "&kappa;", "kappa", "kappa", "κ"),
("Lambda", "\\Lambda", true, "&Lambda;", "Lambda", "Lambda", "Λ"),
("lambda", "\\lambda", true, "&lambda;", "lambda", "lambda", "λ"),
("Mu", "M", false, "&Mu;", "Mu", "Mu", "Μ"),
("mu", "\\mu", true, "&mu;", "mu", "mu", "μ"),
("nu", "\\nu", true, "&nu;", "nu", "nu", "ν"),
("Nu", "N", false, "&Nu;", "Nu", "Nu", "Ν"),
("Xi", "\\Xi", true, "&Xi;", "Xi", "Xi", "Ξ"),
("xi", "\\xi", true, "&xi;", "xi", "xi", "ξ"),
("Omicron", "O", false, "&Omicron;", "Omicron", "Omicron", "Ο"),
("omicron", "\\textit{o}", false, "&omicron;", "omicron", "omicron", "ο"),
("Pi", "\\Pi", true, "&Pi;", "Pi", "Pi", "Π"),
("pi", "\\pi", true, "&pi;", "pi", "pi", "π"),
("Rho", "P", false, "&Rho;", "Rho", "Rho", "Ρ"),
("rho", "\\rho", true, "&rho;", "rho", "rho", "ρ"),
("Sigma", "\\Sigma", true, "&Sigma;", "Sigma", "Sigma", "Σ"),
("sigma", "\\sigma", true, "&sigma;", "sigma", "sigma", "σ"),
("sigmaf", "\\varsigma", true, "&sigmaf;", "sigmaf", "sigmaf", "ς"),
("varsigma", "\\varsigma", true, "&sigmaf;", "varsigma", "varsigma", "ς"),
("Tau", "T", false, "&Tau;", "Tau", "Tau", "Τ"),
("Upsilon", "\\Upsilon", true, "&Upsilon;", "Upsilon", "Upsilon", "Υ"),
("upsih", "\\Upsilon", true, "&upsih;", "upsilon", "upsilon", "ϒ"),
("upsilon", "\\upsilon", true, "&upsilon;", "upsilon", "upsilon", "υ"),
("Phi", "\\Phi", true, "&Phi;", "Phi", "Phi", "Φ"),
("phi", "\\phi", true, "&phi;", "phi", "phi", "ɸ"),
("varphi", "\\varphi", true, "&varphi;", "varphi", "varphi", "φ"),
("Chi", "X", false, "&Chi;", "Chi", "Chi", "Χ"),
("chi", "\\chi", true, "&chi;", "chi", "chi", "χ"),
("acutex", "\\acute x", true, "&acute;x", "'x", "'x", "𝑥́"),
("Psi", "\\Psi", true, "&Psi;", "Psi", "Psi", "Ψ"),
("psi", "\\psi", true, "&psi;", "psi", "psi", "ψ"),
("tau", "\\tau", true, "&tau;", "tau", "tau", "τ"),
("Omega", "\\Omega", true, "&Omega;", "Omega", "Omega", "Ω"),
("omega", "\\omega", true, "&omega;", "omega", "omega", "ω"),
("piv", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("varpi", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("partial", "\\partial", true, "&part;", "[partial differential]", "[partial differential]", ""),
// Hebrew
("alefsym", "\\aleph", true, "&alefsym;", "aleph", "aleph", ""),
("aleph", "\\aleph", true, "&aleph;", "aleph", "aleph", ""),
("gimel", "\\gimel", true, "&gimel;", "gimel", "gimel", ""),
("beth", "\\beth", true, "&beth;", "beth", "beth", "ב"),
("dalet", "\\daleth", true, "&daleth;", "dalet", "dalet", "ד"),
// Icelandic
("ETH", "\\DH{}", false, "&ETH;", "D", "Ð", "Ð"),
("eth", "\\dh{}", false, "&eth;", "dh", "ð", "ð"),
("THORN", "\\TH{}", false, "&THORN;", "TH", "Þ", "Þ"),
("thorn", "\\th{}", false, "&thorn;", "th", "þ", "þ"),
//, "* Punctuation",
// Dots and Marks
("dots", "\\dots{}", false, "&hellip;", "...", "...", ""),
("cdots", "\\cdots{}", true, "&ctdot;", "...", "...", ""),
("hellip", "\\dots{}", false, "&hellip;", "...", "...", ""),
("middot", "\\textperiodcentered{}", false, "&middot;", ".", "·", "·"),
("iexcl", "!`", false, "&iexcl;", "!", "¡", "¡"),
("iquest", "?`", false, "&iquest;", "?", "¿", "¿"),
// Dash-like
("shy", "\\-", false, "&shy;", "", "", ""),
("ndash", "--", false, "&ndash;", "-", "-", ""),
("mdash", "---", false, "&mdash;", "--", "--", ""),
// Quotations
("quot", "\\textquotedbl{}", false, "&quot;", "\"", "\"", "\""),
("acute", "\\textasciiacute{}", false, "&acute;", "'", "´", "´"),
("ldquo", "\\textquotedblleft{}", false, "&ldquo;", "\"", "\"", ""),
("rdquo", "\\textquotedblright{}", false, "&rdquo;", "\"", "\"", ""),
("bdquo", "\\quotedblbase{}", false, "&bdquo;", "\"", "\"", ""),
("lsquo", "\\textquoteleft{}", false, "&lsquo;", "`", "`", ""),
("rsquo", "\\textquoteright{}", false, "&rsquo;", "'", "'", ""),
("sbquo", "\\quotesinglbase{}", false, "&sbquo;", ", ", ", ", ""),
("laquo", "\\guillemotleft{}", false, "&laquo;", "<<", "«", "«"),
("raquo", "\\guillemotright{}", false, "&raquo;", ">>", "»", "»"),
("lsaquo", "\\guilsinglleft{}", false, "&lsaquo;", "<", "<", ""),
("rsaquo", "\\guilsinglright{}", false, "&rsaquo;", ">", ">", ""),
//, "* Other",
// Misc. (often used)
("circ", "\\^{}", false, "&circ;", "^", "^", ""),
("vert", "\\vert{}", true, "&vert;", "|", "|", "|"),
("vbar", "|", false, "|", "|", "|", "|"),
("brvbar", "\\textbrokenbar{}", false, "&brvbar;", "|", "¦", "¦"),
("S", "\\S", false, "&sect;", "section", "§", "§"),
("sect", "\\S", false, "&sect;", "section", "§", "§"),
("P", "\\P{}", false, "&para;", "paragraph", "", ""),
("para", "\\P{}", false, "&para;", "paragraph", "", ""),
("amp", "\\&", false, "&amp;", "&", "&", "&"),
("lt", "\\textless{}", false, "&lt;", "<", "<", "<"),
("gt", "\\textgreater{}", false, "&gt;", ">", ">", ">"),
("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"),
("slash", "/", false, "/", "/", "/", "/"),
("plus", "+", false, "+", "+", "+", "+"),
("under", "\\_", false, "_", "_", "_", "_"),
("equal", "=", false, "=", "=", "=", "="),
("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"),
("dagger", "\\textdagger{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("dag", "\\dag{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("Dagger", "\\textdaggerdbl{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
("ddag", "\\ddag{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
// Whitespace
("nbsp", "~", false, "&nbsp;", ", ", "\\x00A0", "\\x00A0"),
("ensp", "\\hspace*{.5em}", false, "&ensp;", ", ", ", ", ""),
("emsp", "\\hspace*{1em}", false, "&emsp;", ", ", ", ", ""),
("thinsp", "\\hspace*{.2em}", false, "&thinsp;", ", ", ", ", ""),
// Currency
("curren", "\\textcurrency{}", false, "&curren;", "curr.", "¤", "¤"),
("cent", "\\textcent{}", false, "&cent;", "cent", "¢", "¢"),
("pound", "\\pounds{}", false, "&pound;", "pound", "£", "£"),
("yen", "\\textyen{}", false, "&yen;", "yen", "¥", "¥"),
("euro", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("EUR", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("dollar", "\\$", false, "$", "$", "$", "$"),
("USD", "\\$", false, "$", "$", "$", "$"),
// Property Marks
("copy", "\\textcopyright{}", false, "&copy;", "(c)", "©", "©"),
("reg", "\\textregistered{}", false, "&reg;", "(r)", "®", "®"),
("trade", "\\texttrademark{}", false, "&trade;", "TM", "TM", ""),
// Science, etrueal.
("minus", "-", true, "&minus;", "-", "-", ""),
("pm", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("plusmn", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("times", "\\texttimes{}", false, "&times;", "*", "×", "×"),
("frasl", "/", false, "&frasl;", "/", "/", ""),
("colon", "\\colon", true, ":", ":", ":", ":"),
("div", "\\textdiv{}", false, "&divide;", "/", "÷", "÷"),
("frac12", "\\textonehalf{}", false, "&frac12;", "1/2", "½", "½"),
("frac14", "\\textonequarter{}", false, "&frac14;", "1/4", "¼", "¼"),
("frac34", "\\textthreequarters{}", false, "&frac34;", "3/4", "¾", "¾"),
("permil", "\\textperthousand{}", false, "&permil;", "per thousand", "per thousand", ""),
("sup1", "\\textonesuperior{}", false, "&sup1;", "^1", "¹", "¹"),
("sup2", "\\texttwosuperior{}", false, "&sup2;", "^2", "²", "²"),
("sup3", "\\textthreesuperior{}", false, "&sup3;", "^3", "³", "³"),
("radic", "\\sqrt{\\,}", true, "&radic;", "[square root]", "[square root]", ""),
("sum", "\\sum", true, "&sum;", "[sum]", "[sum]", ""),
("prod", "\\prod", true, "&prod;", "[product]", "[n-ary product]", ""),
("micro", "\\textmu{}", false, "&micro;", "micro", "µ", "µ"),
("macr", "\\textasciimacron{}", false, "&macr;", "[macron]", "¯", "¯"),
("deg", "\\textdegree{}", false, "&deg;", "degree", "°", "°"),
("prime", "\\prime", true, "&prime;", "'", "'", ""),
("Prime", "\\prime{}\\prime", true, "&Prime;", "''", "''", ""),
("infin", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("infty", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("prop", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("propto", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("not", "\\textlnot{}", false, "&not;", "[angled dash]", "¬", "¬"),
("neg", "\\neg{}", true, "&not;", "[angled dash]", "¬", "¬"),
("land", "\\land", true, "&and;", "[logical and]", "[logical and]", ""),
("wedge", "\\wedge", true, "&and;", "[logical and]", "[logical and]", ""),
("lor", "\\lor", true, "&or;", "[logical or]", "[logical or]", ""),
("vee", "\\vee", true, "&or;", "[logical or]", "[logical or]", ""),
("cap", "\\cap", true, "&cap;", "[intersection]", "[intersection]", ""),
("cup", "\\cup", true, "&cup;", "[union]", "[union]", ""),
("smile", "\\smile", true, "&smile;", "[cup product]", "[cup product]", ""),
("frown", "\\frown", true, "&frown;", "[Cap product]", "[cap product]", ""),
("int", "\\int", true, "&int;", "[integral]", "[integral]", ""),
("therefore", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("there4", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("because", "\\because", true, "&because;", "[because]", "[because]", ""),
("sim", "\\sim", true, "&sim;", "~", "~", ""),
("cong", "\\cong", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("simeq", "\\simeq", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("asymp", "\\asymp", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("approx", "\\approx", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("ne", "\\ne", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("neq", "\\neq", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("equiv", "\\equiv", true, "&equiv;", "[identical to]", "[identical to]", ""),
("triangleq", "\\triangleq", true, "&triangleq;", "[defined to]", "[defined to]", ""),
("le", "\\le", true, "&le;", "<=", "<=", ""),
("leq", "\\le", true, "&le;", "<=", "<=", ""),
("ge", "\\ge", true, "&ge;", ">=", ">=", ""),
("geq", "\\ge", true, "&ge;", ">=", ">=", ""),
("lessgtr", "\\lessgtr", true, "&lessgtr;", "[less than or greater than]", "[less than or greater than]", ""),
("lesseqgtr", "\\lesseqgtr", true, "&lesseqgtr;", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", ""),
("ll", "\\ll", true, "&Lt;", "<<", "<<", ""),
("Ll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("lll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("gg", "\\gg", true, "&Gt;", ">>", ">>", ""),
("Gg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("ggg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("prec", "\\prec", true, "&pr;", "[precedes]", "[precedes]", ""),
("preceq", "\\preceq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("preccurlyeq", "\\preccurlyeq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("succ", "\\succ", true, "&sc;", "[succeeds]", "[succeeds]", ""),
("succeq", "\\succeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("succcurlyeq", "\\succcurlyeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("sub", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("subset", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("sup", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("supset", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("nsub", "\\not\\subset", true, "&nsub;", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", ""),
("sube", "\\subseteq", true, "&sube;", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", ""),
("nsup", "\\not\\supset", true, "&nsup;", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", ""),
("supe", "\\supseteq", true, "&supe;", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", ""),
("setminus", "\\setminus", true, "&setminus;", "\\", "\\", ""),
("forall", "\\forall", true, "&forall;", "[for all]", "[for all]", ""),
("exist", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("exists", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("nexist", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("nexists", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("empty", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("emptyset", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("isin", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("in", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("notin", "\\notin", true, "&notin;", "[, notruean, elementrueof]", "[, notruean, elementrueof]", ""),
("ni", "\\ni", true, "&ni;", "[contains as member]", "[contains as member]", ""),
("nabla", "\\nabla", true, "&nabla;", "[nabla]", "[nabla]", ""),
("ang", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("angle", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("perp", "\\perp", true, "&perp;", "[up tack]", "[up tack]", ""),
("parallel", "\\parallel", true, "&parallel;", "||", "||", ""),
("sdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("cdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("lceil", "\\lceil", true, "&lceil;", "[, leftrueceiling]", "[, leftrueceiling]", ""),
("rceil", "\\rceil", true, "&rceil;", "[, rightrueceiling]", "[, rightrueceiling]", ""),
("lfloor", "\\lfloor", true, "&lfloor;", "[, leftruefloor]", "[, leftruefloor]", ""),
("rfloor", "\\rfloor", true, "&rfloor;", "[, rightruefloor]", "[, rightruefloor]", ""),
("lang", "\\langle", true, "&lang;", "<", "<", ""),
("rang", "\\rangle", true, "&rang;", ">", ">", ""),
("langle", "\\langle", true, "&lang;", "<", "<", ""),
("rangle", "\\rangle", true, "&rang;", ">", ">", ""),
("hbar", "\\hbar", true, "&hbar;", "hbar", "hbar", ""),
("mho", "\\mho", true, "&mho;", "mho", "mho", ""),
// Arrows
("larr", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("leftarrow", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("gets", "\\gets", true, "&larr;", "<-", "<-", ""),
("lArr", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("Leftarrow", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("uarr", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uparrow", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uArr", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("Uparrow", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("rarr", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("to", "\\to", true, "&rarr;", "->", "->", ""),
("rightarrow", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("rArr", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("Rightarrow", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("darr", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("downarrow", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("dArr", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("Downarrow", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("harr", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("leftrightarrow", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("hArr", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("Leftrightarrow", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("crarr", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
("hookleftarrow", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
// Function names
("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"),
("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"),
("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"),
("arg", "\\arg", true, "arg", "arg", "arg", "arg"),
("cos", "\\cos", true, "cos", "cos", "cos", "cos"),
("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"),
("cot", "\\cot", true, "cot", "cot", "cot", "cot"),
("coth", "\\coth", true, "coth", "coth", "coth", "coth"),
("csc", "\\csc", true, "csc", "csc", "csc", "csc"),
("deg", "\\deg", true, "&deg;", "deg", "deg", "deg"),
("det", "\\det", true, "det", "det", "det", "det"),
("dim", "\\dim", true, "dim", "dim", "dim", "dim"),
("exp", "\\exp", true, "exp", "exp", "exp", "exp"),
("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"),
("hom", "\\hom", true, "hom", "hom", "hom", "hom"),
("inf", "\\inf", true, "inf", "inf", "inf", "inf"),
("ker", "\\ker", true, "ker", "ker", "ker", "ker"),
("lg", "\\lg", true, "lg", "lg", "lg", "lg"),
("lim", "\\lim", true, "lim", "lim", "lim", "lim"),
("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"),
("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"),
("ln", "\\ln", true, "ln", "ln", "ln", "ln"),
("log", "\\log", true, "log", "log", "log", "log"),
("max", "\\max", true, "max", "max", "max", "max"),
("min", "\\min", true, "min", "min", "min", "min"),
("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"),
("sec", "\\sec", true, "sec", "sec", "sec", "sec"),
("sin", "\\sin", true, "sin", "sin", "sin", "sin"),
("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"),
("sup", "\\sup", true, "&sup;", "sup", "sup", "sup"),
("tan", "\\tan", true, "tan", "tan", "tan", "tan"),
("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"),
// Signs & Symbols
("bull", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("bullet", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("star", "\\star", true, "*", "*", "*", ""),
("lowast", "\\ast", true, "&lowast;", "*", "*", ""),
("ast", "\\ast", true, "&lowast;", "*", "*", "*"),
("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"),
("oplus", "\\oplus", true, "&oplus;", "[circled plus]", "[circled plus]", ""),
("otimes", "\\otimes", true, "&otimes;", "[circled times]", "[circled times]", ""),
("check", "\\checkmark", true, "&checkmark;", "[checkmark]", "[checkmark]", ""),
("checkmark", "\\checkmark", true, "&check;", "[checkmark]", "[checkmark]", ""),
// Miscellaneous (seldom used)
("ordf", "\\textordfeminine{}", false, "&ordf;", "_a_", "ª", "ª"),
("ordm", "\\textordmasculine{}", false, "&ordm;", "_o_", "º", "º"),
("cedil", "\\c{}", false, "&cedil;", "[cedilla]", "¸", "¸"),
("oline", "\\overline{~}", true, "&oline;", "[overline]", "¯", ""),
("uml", "\\textasciidieresis{}", false, "&uml;", "[diaeresis]", "¨", "¨"),
("zwnj", "\\/{}", false, "&zwnj;", "", "", ""),
("zwj", "", false, "&zwj;", "", "", ""),
("lrm", "", false, "&lrm;", "", "", "\u{200E}"),
("rlm", "", false, "&rlm;", "", "", "\u{200F}"),
// Smilies
("smiley", "\\ddot\\smile", true, "&#9786;", ":-)", ":-)", ""),
("blacksmile", "\\ddot\\smile", true, "&#9787;", ":-)", ":-)", ""),
("sad", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
("frowny", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
// Suits
("clubs", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("clubsuit", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("spades", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("spadesuit", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("hearts", "\\heartsuit", true, "&hearts;", "[hearts]", "[hearts]", ""),
("heartsuit", "\\heartsuit", true, "&heartsuit;", "[hearts]", "[hearts]", ""),
("diams", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamondsuit", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("Diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("loz", "\\lozenge", true, "&loz;", "[lozenge]", "[lozenge]", ""),
// spaces
// fish shell:
// for i in (seq 1 20)
// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i '&ensp;')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")'
// end
(" ", "\\hspace*{0.5em}", true, "&ensp;", " ", " ", "\\x2002"),
(" ", "\\hspace*{1em}", true, "&ensp;&ensp;", " ", " ", "\\x2002\\x2002"),
(" ", "\\hspace*{1.5em}", true, "&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2em}", true, "&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{10em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
];

73
src/export/event.rs Normal file
View file

@ -0,0 +1,73 @@
use crate::ast::*;
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Container {
Document(Document),
Section(Section),
Paragraph(Paragraph),
Headline(Headline),
OrgTable(OrgTable),
OrgTableRow(OrgTableRow),
OrgTableCell(OrgTableCell),
TableEl(TableEl),
List(List),
ListItem(ListItem),
Drawer(Drawer),
DynBlock(DynBlock),
FnDef(FnDef),
Comment(Comment),
FixedWidth(FixedWidth),
SpecialBlock(SpecialBlock),
QuoteBlock(QuoteBlock),
CenterBlock(CenterBlock),
VerseBlock(VerseBlock),
CommentBlock(CommentBlock),
ExampleBlock(ExampleBlock),
ExportBlock(ExportBlock),
SourceBlock(SourceBlock),
Link(Link),
RadioTarget(RadioTarget),
FnRef(FnRef),
Target(Target),
Bold(Bold),
Strike(Strike),
Italic(Italic),
Underline(Underline),
Verbatim(Verbatim),
Code(Code),
Superscript(Superscript),
Subscript(Subscript),
BabelCall(BabelCall),
PropertyDrawer(PropertyDrawer),
AffiliatedKeyword(AffiliatedKeyword),
Keyword(Keyword),
}
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Event {
Enter(Container),
Leave(Container),
Text(Token),
Macros(Macros),
Cookie(Cookie),
InlineCall(InlineCall),
InlineSrc(InlineSrc),
Clock(Clock),
LineBreak(LineBreak),
Snippet(Snippet),
Rule(Rule),
Timestamp(Timestamp),
LatexFragment(LatexFragment),
LatexEnvironment(LatexEnvironment),
Entity(Entity),
#[cfg(feature = "syntax-org-fc")]
Cloze(Cloze),
}

View file

@ -1,10 +1,12 @@
use rowan::NodeOrToken;
use std::cmp::min;
use std::fmt;
use std::io::{Error, Result as IOResult, Write};
use std::fmt::Write as _;
use jetscii::{bytes, BytesConst};
use crate::elements::{Element, Table, TableCell, TableRow, Timestamp};
use crate::export::write_datetime;
use super::event::{Container, Event};
use super::TraversalContext;
use super::Traverser;
use crate::{SyntaxElement, SyntaxKind, SyntaxNode};
/// A wrapper for escaping sensitive characters in html.
///
@ -26,11 +28,7 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
let content = self.0.as_ref();
let bytes = content.as_bytes();
lazy_static::lazy_static! {
static ref ESCAPE_BYTES: BytesConst = bytes!(b'<', b'>', b'&', b'\'', b'"');
}
while let Some(off) = ESCAPE_BYTES.find(&bytes[pos..]) {
while let Some(off) = jetscii::bytes!(b'<', b'>', b'&', b'\'', b'"').find(&bytes[pos..]) {
write!(f, "{}", &content[pos..pos + off])?;
pos += off + 1;
@ -41,7 +39,7 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
b'&' => write!(f, "&amp;")?,
b'\'' => write!(f, "&apos;")?,
b'"' => write!(f, "&quot;")?,
_ => unreachable!(),
_ => {}
}
}
@ -49,349 +47,295 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
}
}
pub trait HtmlHandler<E: From<Error>>: Default {
fn start<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
}
/// Default Html Handler
#[derive(Default)]
pub struct DefaultHtmlHandler;
pub struct HtmlExport {
output: String,
impl HtmlHandler<Error> for DefaultHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(_) => (),
Element::QuoteBlock(_) => write!(w, "<blockquote>")?,
Element::CenterBlock(_) => write!(w, "<div class=\"center\">")?,
Element::VerseBlock(_) => write!(w, "<p class=\"verse\">")?,
Element::Bold => write!(w, "<b>")?,
Element::Document { .. } => write!(w, "<main>")?,
Element::DynBlock(_dyn_block) => (),
Element::Headline { .. } => (),
Element::List(list) => {
if list.ordered {
write!(w, "<ol>")?;
} else {
write!(w, "<ul>")?;
}
}
Element::Italic => write!(w, "<i>")?,
Element::ListItem(_) => write!(w, "<li>")?,
Element::Paragraph { .. } => write!(w, "<p>")?,
Element::Section => write!(w, "<section>")?,
Element::Strike => write!(w, "<s>")?,
Element::Underline => write!(w, "<u>")?,
// non-container elements
Element::CommentBlock(_) => (),
Element::ExampleBlock(block) => write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&block.contents)
)?,
Element::ExportBlock(block) => {
if block.data.eq_ignore_ascii_case("HTML") {
write!(w, "{}", block.contents)?
}
}
Element::SourceBlock(block) => {
if block.language.is_empty() {
write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&block.contents)
)?;
} else {
write!(
w,
"<div class=\"org-src-container\"><pre class=\"src src-{}\">{}</pre></div>",
block.language,
HtmlEscape(&block.contents)
)?;
}
}
Element::BabelCall(_) => (),
Element::InlineSrc(inline_src) => write!(
w,
"<code class=\"src src-{}\">{}</code>",
inline_src.lang,
HtmlEscape(&inline_src.body)
)?,
Element::Code { value } => write!(w, "<code>{}</code>", HtmlEscape(value))?,
Element::FnRef(_fn_ref) => (),
Element::InlineCall(_) => (),
Element::Link(link) => write!(
w,
"<a href=\"{}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(link.desc.as_ref().unwrap_or(&link.path)),
)?,
Element::Macros(_macros) => (),
Element::RadioTarget => (),
Element::Snippet(snippet) => {
if snippet.name.eq_ignore_ascii_case("HTML") {
write!(w, "{}", snippet.value)?;
}
}
Element::Target(_target) => (),
Element::Text { value } => write!(w, "{}", HtmlEscape(value))?,
Element::Timestamp(timestamp) => {
write!(
&mut w,
"<span class=\"timestamp-wrapper\"><span class=\"timestamp\">"
)?;
in_descriptive_list: Vec<bool>,
match timestamp {
Timestamp::Active { start, .. } => {
write_datetime(&mut w, "&lt;", start, "&gt;")?;
}
Timestamp::Inactive { start, .. } => {
write_datetime(&mut w, "[", start, "]")?;
}
Timestamp::ActiveRange { start, end, .. } => {
write_datetime(&mut w, "&lt;", start, "&gt;&#x2013;")?;
write_datetime(&mut w, "&lt;", end, "&gt;")?;
}
Timestamp::InactiveRange { start, end, .. } => {
write_datetime(&mut w, "[", start, "]&#x2013;")?;
write_datetime(&mut w, "[", end, "]")?;
}
Timestamp::Diary { value } => {
write!(&mut w, "&lt;%%({})&gt;", HtmlEscape(value))?
}
}
write!(&mut w, "</span></span>")?;
}
Element::Verbatim { value } => write!(&mut w, "<code>{}</code>", HtmlEscape(value))?,
Element::FnDef(_fn_def) => (),
Element::Clock(_clock) => (),
Element::Comment(_) => (),
Element::FixedWidth(fixed_width) => write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&fixed_width.value)
)?,
Element::Keyword(_keyword) => (),
Element::Drawer(_drawer) => (),
Element::Rule(_) => write!(w, "<hr>")?,
Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?,
Element::Title(title) => {
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?;
}
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { has_header, .. }) => {
write!(w, "<table>")?;
if *has_header {
write!(w, "<thead>")?;
} else {
write!(w, "<tbody>")?;
}
}
Element::TableRow(row) => match row {
TableRow::Body => write!(w, "<tr>")?,
TableRow::BodyRule => write!(w, "</tbody><tbody>")?,
TableRow::Header => write!(w, "<tr>")?,
TableRow::HeaderRule => write!(w, "</thead><tbody>")?,
},
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "<td>")?,
TableCell::Header => write!(w, "<th>")?,
},
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(_) => (),
Element::QuoteBlock(_) => write!(w, "</blockquote>")?,
Element::CenterBlock(_) => write!(w, "</div>")?,
Element::VerseBlock(_) => write!(w, "</p>")?,
Element::Bold => write!(w, "</b>")?,
Element::Document { .. } => write!(w, "</main>")?,
Element::DynBlock(_dyn_block) => (),
Element::Headline { .. } => (),
Element::List(list) => {
if list.ordered {
write!(w, "</ol>")?;
} else {
write!(w, "</ul>")?;
}
}
Element::Italic => write!(w, "</i>")?,
Element::ListItem(_) => write!(w, "</li>")?,
Element::Paragraph { .. } => write!(w, "</p>")?,
Element::Section => write!(w, "</section>")?,
Element::Strike => write!(w, "</s>")?,
Element::Underline => write!(w, "</u>")?,
Element::Title(title) => {
write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })?
}
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { .. }) => {
write!(w, "</tbody></table>")?;
}
Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => {
write!(w, "</tr>")?;
}
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "</td>")?,
TableCell::Header => write!(w, "</th>")?,
},
// non-container elements
_ => debug_assert!(!element.is_container()),
}
Ok(())
}
table_row: TableRow,
}
#[cfg(feature = "syntect")]
mod syntect_handler {
use super::*;
use std::marker::PhantomData;
#[derive(Default, PartialEq, Eq)]
enum TableRow {
#[default]
HeaderRule,
Header,
BodyRule,
Body,
}
use syntect::{
easy::HighlightLines,
highlighting::ThemeSet,
html::{styled_line_to_highlighted_html, IncludeBackground},
parsing::SyntaxSet,
};
impl HtmlExport {
pub fn push_str(&mut self, s: impl AsRef<str>) {
self.output += s.as_ref();
}
/// Syntect Html Handler
///
/// Simple Usage:
pub fn finish(self) -> String {
self.output
}
/// Render syntax node to html string
///
/// ```rust
/// use orgize::Org;
/// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler};
/// use orgize::{Org, ast::Bold, export::HtmlExport, rowan::ast::AstNode};
///
/// let mut handler = SyntectHtmlHandler::new(DefaultHtmlHandler);
/// let org = Org::parse("src_rust{println!(\"Hello\")}");
///
/// let mut vec = vec![];
///
/// org.write_html_custom(&mut vec, &mut handler).unwrap();
/// let org = Org::parse("* /hello/ *world*");
/// let bold = org.first_node::<Bold>().unwrap();
/// let mut html = HtmlExport::default();
/// html.render(bold.syntax());
/// assert_eq!(html.finish(), "<b>world</b>");
/// ```
///
/// Customize:
///
/// ```rust,no_run
/// // orgize has re-exported the whole syntect crate
/// use orgize::syntect::parsing::SyntaxSet;
/// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler};
///
/// let mut handler = SyntectHtmlHandler {
/// syntax_set: {
/// let set = SyntaxSet::load_defaults_newlines();
/// let mut builder = set.into_builder();
/// // add extra language syntax
/// builder.add_from_folder("path/to/syntax/dir", true).unwrap();
/// builder.build()
/// },
/// // specify theme
/// theme: String::from("Solarized (dark)"),
/// inner: DefaultHtmlHandler,
/// ..Default::default()
/// };
///
/// // Make sure to check if theme presents or it will panic at runtime
/// if handler.theme_set.themes.contains_key("dont-exists") {
///
/// }
/// ```
pub struct SyntectHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// syntax set, default is `SyntaxSet::load_defaults_newlines()`
pub syntax_set: SyntaxSet,
/// theme set, default is `ThemeSet::load_defaults()`
pub theme_set: ThemeSet,
/// theme used for highlighting, default is `"InspiredGitHub"`
pub theme: String,
/// inner html handler
pub inner: H,
/// background color, default is `IncludeBackground::No`
pub background: IncludeBackground,
/// handler error type
pub error_type: PhantomData<E>,
}
impl<E: From<Error>, H: HtmlHandler<E>> SyntectHtmlHandler<E, H> {
pub fn new(inner: H) -> Self {
SyntectHtmlHandler {
inner,
..Default::default()
}
}
fn highlight(&self, language: Option<&str>, content: &str) -> String {
let mut highlighter = HighlightLines::new(
language
.and_then(|lang| self.syntax_set.find_syntax_by_token(lang))
.unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()),
&self.theme_set.themes[&self.theme],
);
let regions = highlighter.highlight(content, &self.syntax_set);
styled_line_to_highlighted_html(&regions[..], self.background)
}
}
impl<E: From<Error>, H: HtmlHandler<E>> Default for SyntectHtmlHandler<E, H> {
fn default() -> Self {
SyntectHtmlHandler {
syntax_set: SyntaxSet::load_defaults_newlines(),
theme_set: ThemeSet::load_defaults(),
theme: String::from("InspiredGitHub"),
inner: H::default(),
background: IncludeBackground::No,
error_type: PhantomData,
}
}
}
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for SyntectHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
match element {
Element::InlineSrc(inline_src) => write!(
w,
"<code>{}</code>",
self.highlight(Some(&inline_src.lang), &inline_src.body)
)?,
Element::SourceBlock(block) => {
if block.language.is_empty() {
write!(w, "<pre class=\"example\">{}</pre>", block.contents)?;
} else {
write!(
w,
"<div class=\"org-src-container\"><pre class=\"src src-{}\">{}</pre></div>",
block.language,
self.highlight(Some(&block.language), &block.contents)
)?;
}
}
Element::FixedWidth(fixed_width) => write!(
w,
"<pre class=\"example\">{}</pre>",
self.highlight(None, &fixed_width.value)
)?,
Element::ExampleBlock(block) => write!(
w,
"<pre class=\"example\">{}</pre>",
self.highlight(None, &block.contents)
)?,
_ => self.inner.start(w, element)?,
}
Ok(())
}
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E> {
self.inner.end(w, element)
}
pub fn render(&mut self, node: &SyntaxNode) {
let mut ctx = TraversalContext::default();
self.element(SyntaxElement::Node(node.clone()), &mut ctx);
}
}
#[cfg(feature = "syntect")]
pub use syntect_handler::SyntectHtmlHandler;
impl Traverser for HtmlExport {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
match event {
Event::Enter(Container::Document(_)) => self.output += "<main>",
Event::Leave(Container::Document(_)) => self.output += "</main>",
Event::Enter(Container::Headline(headline)) => {
let level = min(headline.level(), 6);
let _ = write!(&mut self.output, "<h{level}>");
for elem in headline.title() {
self.element(elem, ctx);
}
let _ = write!(&mut self.output, "</h{level}>");
}
Event::Leave(Container::Headline(_)) => {}
Event::Enter(Container::Paragraph(_)) => self.output += "<p>",
Event::Leave(Container::Paragraph(_)) => self.output += "</p>",
Event::Enter(Container::Section(_)) => self.output += "<section>",
Event::Leave(Container::Section(_)) => self.output += "</section>",
Event::Enter(Container::Italic(_)) => self.output += "<i>",
Event::Leave(Container::Italic(_)) => self.output += "</i>",
Event::Enter(Container::Bold(_)) => self.output += "<b>",
Event::Leave(Container::Bold(_)) => self.output += "</b>",
Event::Enter(Container::Strike(_)) => self.output += "<s>",
Event::Leave(Container::Strike(_)) => self.output += "</s>",
Event::Enter(Container::Underline(_)) => self.output += "<u>",
Event::Leave(Container::Underline(_)) => self.output += "</u>",
Event::Enter(Container::Verbatim(_)) => self.output += "<code>",
Event::Leave(Container::Verbatim(_)) => self.output += "</code>",
Event::Enter(Container::Code(_)) => self.output += "<code>",
Event::Leave(Container::Code(_)) => self.output += "</code>",
Event::Enter(Container::SourceBlock(block)) => {
if let Some(language) = block.language() {
let _ = write!(
&mut self.output,
r#"<pre><code class="language-{}">"#,
HtmlEscape(&language)
);
} else {
self.output += r#"<pre><code>"#
}
}
Event::Leave(Container::SourceBlock(_)) => self.output += "</code></pre>",
Event::Enter(Container::QuoteBlock(_)) => self.output += "<blockquote>",
Event::Leave(Container::QuoteBlock(_)) => self.output += "</blockquote>",
Event::Enter(Container::VerseBlock(_)) => self.output += "<p class=\"verse\">",
Event::Leave(Container::VerseBlock(_)) => self.output += "</p>",
Event::Enter(Container::ExampleBlock(_)) => self.output += "<pre class=\"example\">",
Event::Leave(Container::ExampleBlock(_)) => self.output += "</pre>",
Event::Enter(Container::CenterBlock(_)) => self.output += "<div class=\"center\">",
Event::Leave(Container::CenterBlock(_)) => self.output += "</div>",
Event::Enter(Container::CommentBlock(_)) => self.output += "<!--",
Event::Leave(Container::CommentBlock(_)) => self.output += "-->",
Event::Enter(Container::Comment(_)) => self.output += "<!--",
Event::Leave(Container::Comment(_)) => self.output += "-->",
Event::Enter(Container::Subscript(_)) => self.output += "<sub>",
Event::Leave(Container::Subscript(_)) => self.output += "</sub>",
Event::Enter(Container::Superscript(_)) => self.output += "<sup>",
Event::Leave(Container::Superscript(_)) => self.output += "</sup>",
Event::Enter(Container::List(list)) => {
self.output += if list.is_ordered() {
self.in_descriptive_list.push(false);
"<ol>"
} else if list.is_descriptive() {
self.in_descriptive_list.push(true);
"<dl>"
} else {
self.in_descriptive_list.push(false);
"<ul>"
};
}
Event::Leave(Container::List(list)) => {
self.output += if list.is_ordered() {
"</ol>"
} else if let Some(true) = self.in_descriptive_list.last() {
"</dl>"
} else {
"</ul>"
};
self.in_descriptive_list.pop();
}
Event::Enter(Container::ListItem(list_item)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "<dt>";
for elem in list_item.tag() {
self.element(elem, ctx);
}
self.output += "</dt><dd>";
} else {
self.output += "<li>";
}
}
Event::Leave(Container::ListItem(_)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "</dd>";
} else {
self.output += "</li>";
}
}
Event::Enter(Container::OrgTable(table)) => {
self.output += "<table>";
self.table_row = if table.has_header() {
TableRow::HeaderRule
} else {
TableRow::BodyRule
}
}
Event::Leave(Container::OrgTable(_)) => {
match self.table_row {
TableRow::Body => self.output += "</tbody>",
TableRow::Header => self.output += "</thead>",
_ => {}
}
self.output += "</table>";
}
Event::Enter(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
match self.table_row {
TableRow::HeaderRule => {
self.table_row = TableRow::Header;
self.output += "<thead>";
}
TableRow::BodyRule => {
self.table_row = TableRow::Body;
self.output += "<tbody>";
}
_ => {}
}
self.output += "<tr>";
}
}
Event::Leave(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
self.output += "</tr>";
}
}
Event::Enter(Container::OrgTableCell(_)) => self.output += "<td>",
Event::Leave(Container::OrgTableCell(_)) => self.output += "</td>",
Event::Enter(Container::Link(link)) => {
let path = link.path();
let path = path.trim_start_matches("file:");
if link.is_image() {
let _ = write!(&mut self.output, r#"<img src="{}">"#, HtmlEscape(&path));
return ctx.skip();
}
let _ = write!(&mut self.output, r#"<a href="{}">"#, HtmlEscape(&path));
if !link.has_description() {
let _ = write!(&mut self.output, "{}</a>", HtmlEscape(&path));
ctx.skip();
}
}
Event::Leave(Container::Link(_)) => self.output += "</a>",
Event::Text(text) => {
let _ = write!(&mut self.output, "{}", HtmlEscape(text));
}
Event::LineBreak(_) => self.output += "<br/>",
Event::Snippet(snippet) => {
if snippet.backend().eq_ignore_ascii_case("html") {
self.output += &snippet.value();
}
}
Event::Rule(_) => self.output += "<hr/>",
Event::Timestamp(timestamp) => {
self.output += r#"<span class="timestamp-wrapper"><span class="timestamp">"#;
for e in timestamp.syntax.children_with_tokens() {
match e {
NodeOrToken::Token(t) if t.kind() == SyntaxKind::MINUS2 => {
self.output += "&#x2013;";
}
NodeOrToken::Token(t) => {
self.output += t.text();
}
_ => {}
}
}
self.output += r#"</span></span>"#;
}
Event::LatexFragment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::LatexEnvironment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
// ignores keyword
Event::Enter(Container::Keyword(_)) => ctx.skip(),
Event::Entity(entity) => self.output += entity.html(),
_ => {}
}
}
}

186
src/export/markdown.rs Normal file
View file

@ -0,0 +1,186 @@
use std::cmp::min;
use std::fmt::Write as _;
use crate::{SyntaxElement, SyntaxNode};
use super::event::{Container, Event};
use super::TraversalContext;
use super::Traverser;
#[derive(Default)]
pub struct MarkdownExport {
output: String,
inside_blockquote: bool,
}
impl MarkdownExport {
pub fn push_str(&mut self, s: impl AsRef<str>) {
self.output += s.as_ref();
}
/// Render syntax node to markdown string
///
/// ```rust
/// use orgize::{Org, ast::Bold, export::MarkdownExport, rowan::ast::AstNode};
///
/// let org = Org::parse("* /hello/ *world*");
/// let bold = org.first_node::<Bold>().unwrap();
/// let mut markdown = MarkdownExport::default();
/// markdown.render(bold.syntax());
/// assert_eq!(markdown.finish(), "**world**");
/// ```
pub fn render(&mut self, node: &SyntaxNode) {
let mut ctx = TraversalContext::default();
self.element(SyntaxElement::Node(node.clone()), &mut ctx);
}
pub fn finish(self) -> String {
self.output
}
fn follows_newline(&mut self) {
if !self.output.is_empty() && !self.output.ends_with(['\n', '\r']) {
self.output += "\n";
}
}
}
impl Traverser for MarkdownExport {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
match event {
Event::Enter(Container::Document(_)) => {}
Event::Leave(Container::Document(_)) => {}
Event::Enter(Container::Headline(headline)) => {
self.follows_newline();
let level = min(headline.level(), 6);
let _ = write!(&mut self.output, "{} ", "#".repeat(level));
for elem in headline.title() {
self.element(elem, ctx);
}
}
Event::Leave(Container::Headline(_)) => {}
Event::Enter(Container::Paragraph(_)) => {}
Event::Leave(Container::Paragraph(_)) => self.output += "\n",
Event::Enter(Container::Section(_)) => self.follows_newline(),
Event::Leave(Container::Section(_)) => {}
Event::Enter(Container::Italic(_)) => self.output += "*",
Event::Leave(Container::Italic(_)) => self.output += "*",
Event::Enter(Container::Bold(_)) => self.output += "**",
Event::Leave(Container::Bold(_)) => self.output += "**",
Event::Enter(Container::Strike(_)) => self.output += "~~",
Event::Leave(Container::Strike(_)) => self.output += "~~",
Event::Enter(Container::Underline(_)) => {}
Event::Leave(Container::Underline(_)) => {}
Event::Enter(Container::Verbatim(_))
| Event::Leave(Container::Verbatim(_))
| Event::Enter(Container::Code(_))
| Event::Leave(Container::Code(_)) => self.output += "`",
Event::Enter(Container::SourceBlock(block)) => {
self.follows_newline();
self.output += "```";
if let Some(language) = block.language() {
self.output += &language;
}
}
Event::Leave(Container::SourceBlock(_)) => self.output += "```\n",
Event::Enter(Container::QuoteBlock(_)) => {
self.inside_blockquote = true;
self.follows_newline();
self.output += "> ";
}
Event::Leave(Container::QuoteBlock(_)) => self.inside_blockquote = false,
Event::Enter(Container::CommentBlock(_)) => self.output += "<!--",
Event::Leave(Container::CommentBlock(_)) => self.output += "-->",
Event::Enter(Container::Comment(_)) => self.output += "<!--",
Event::Leave(Container::Comment(_)) => self.output += "-->",
Event::Enter(Container::Subscript(_)) => self.output += "<sub>",
Event::Leave(Container::Subscript(_)) => self.output += "</sub>",
Event::Enter(Container::Superscript(_)) => self.output += "<sup>",
Event::Leave(Container::Superscript(_)) => self.output += "</sup>",
Event::Enter(Container::List(_list)) => {}
Event::Leave(Container::List(_list)) => {}
Event::Enter(Container::ListItem(list_item)) => {
self.follows_newline();
self.output += &" ".repeat(list_item.indent());
self.output += &list_item.bullet();
}
Event::Leave(Container::ListItem(_)) => {}
Event::Enter(Container::OrgTable(_table)) => {}
Event::Leave(Container::OrgTable(_)) => {}
Event::Enter(Container::OrgTableRow(_row)) => {}
Event::Leave(Container::OrgTableRow(_row)) => {}
Event::Enter(Container::OrgTableCell(_)) => {}
Event::Leave(Container::OrgTableCell(_)) => {}
Event::Enter(Container::Link(link)) => {
let path = link.path();
let path = path.trim_start_matches("file:");
if link.is_image() {
let _ = write!(&mut self.output, "![]({path})");
return ctx.skip();
}
if !link.has_description() {
let _ = write!(&mut self.output, r#"[{}]({})"#, &path, &path);
return ctx.skip();
}
self.output += "[";
}
Event::Leave(Container::Link(link)) => {
let _ = write!(&mut self.output, r#"]({})"#, &*link.path());
}
Event::Text(text) => {
if self.inside_blockquote {
for (idx, line) in text.split('\n').enumerate() {
if idx != 0 {
self.output += "\n> ";
}
self.output += line;
}
} else {
self.output += &*text;
}
}
Event::LineBreak(_) => {}
Event::Snippet(_snippet) => {}
Event::Rule(_) => self.output += "\n-----\n",
Event::Timestamp(_timestamp) => {}
Event::LatexFragment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::LatexEnvironment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::Entity(entity) => self.output += entity.utf8(),
_ => {}
}
}
}

View file

@ -1,31 +1,11 @@
//! Export `Org` struct to various formats.
mod event;
mod html;
mod org;
mod markdown;
mod traverse;
#[cfg(feature = "syntect")]
pub use html::SyntectHtmlHandler;
pub use html::{DefaultHtmlHandler, HtmlEscape, HtmlHandler};
pub use org::{DefaultOrgHandler, OrgHandler};
use std::io::{Error, Write};
use crate::elements::Datetime;
pub(crate) fn write_datetime<W: Write>(
mut w: W,
start: &str,
datetime: &Datetime,
end: &str,
) -> Result<(), Error> {
write!(w, "{}", start)?;
write!(
w,
"{}-{:02}-{:02} {}",
datetime.year, datetime.month, datetime.day, datetime.dayname
)?;
if let (Some(hour), Some(minute)) = (datetime.hour, datetime.minute) {
write!(w, " {:02}:{:02}", hour, minute)?;
}
write!(w, "{}", end)
}
pub use event::{Container, Event};
pub use html::{HtmlEscape, HtmlExport};
pub use markdown::MarkdownExport;
pub use traverse::{from_fn, from_fn_with_ctx, FromFn, FromFnWithCtx, TraversalContext, Traverser};

View file

@ -1,321 +0,0 @@
use std::io::{Error, Result as IOResult, Write};
use crate::elements::{Clock, Element, Table, Timestamp};
use crate::export::write_datetime;
pub trait OrgHandler<E: From<Error>>: Default {
fn start<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
}
#[derive(Default)]
pub struct DefaultOrgHandler;
impl OrgHandler<Error> for DefaultOrgHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(block) => {
writeln!(w, "#+BEGIN_{}", block.name)?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::QuoteBlock(block) => {
writeln!(&mut w, "#+BEGIN_QUOTE")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::CenterBlock(block) => {
writeln!(&mut w, "#+BEGIN_CENTER")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::VerseBlock(block) => {
writeln!(&mut w, "#+BEGIN_VERSE")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::Bold => write!(w, "*")?,
Element::Document { pre_blank } => {
write_blank_lines(w, *pre_blank)?;
}
Element::DynBlock(dyn_block) => {
write!(&mut w, "#+BEGIN: {}", dyn_block.block_name)?;
if let Some(parameters) = &dyn_block.arguments {
write!(&mut w, " {}", parameters)?;
}
write_blank_lines(&mut w, dyn_block.pre_blank + 1)?;
}
Element::Headline { .. } => (),
Element::List(_list) => (),
Element::Italic => write!(w, "/")?,
Element::ListItem(list_item) => {
for _ in 0..list_item.indent {
write!(&mut w, " ")?;
}
write!(&mut w, "{}", list_item.bullet)?;
}
Element::Paragraph { .. } => (),
Element::Section => (),
Element::Strike => write!(w, "+")?,
Element::Underline => write!(w, "_")?,
Element::Drawer(drawer) => {
writeln!(&mut w, ":{}:", drawer.name)?;
write_blank_lines(&mut w, drawer.pre_blank)?;
}
// non-container elements
Element::CommentBlock(block) => {
writeln!(&mut w, "#+BEGIN_COMMENT")?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_COMMENT")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::ExampleBlock(block) => {
writeln!(&mut w, "#+BEGIN_EXAMPLE")?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_EXAMPLE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::ExportBlock(block) => {
writeln!(&mut w, "#+BEGIN_EXPORT {}", block.data)?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_EXPORT")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::SourceBlock(block) => {
writeln!(&mut w, "#+BEGIN_SRC {}", block.language)?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_SRC")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::BabelCall(call) => {
writeln!(&mut w, "#+CALL: {}", call.value)?;
write_blank_lines(w, call.post_blank)?;
}
Element::InlineSrc(inline_src) => {
write!(&mut w, "src_{}", inline_src.lang)?;
if let Some(options) = &inline_src.options {
write!(&mut w, "[{}]", options)?;
}
write!(&mut w, "{{{}}}", inline_src.body)?;
}
Element::Code { value } => write!(w, "~{}~", value)?,
Element::FnRef(fn_ref) => {
write!(&mut w, "[fn:{}", fn_ref.label)?;
if let Some(definition) = &fn_ref.definition {
write!(&mut w, ":{}", definition)?;
}
write!(&mut w, "]")?;
}
Element::InlineCall(inline_call) => {
write!(&mut w, "call_{}", inline_call.name)?;
if let Some(header) = &inline_call.inside_header {
write!(&mut w, "[{}]", header)?;
}
write!(&mut w, "({})", inline_call.arguments)?;
if let Some(header) = &inline_call.end_header {
write!(&mut w, "[{}]", header)?;
}
}
Element::Link(link) => {
write!(&mut w, "[[{}]", link.path)?;
if let Some(desc) = &link.desc {
write!(&mut w, "[{}]", desc)?;
}
write!(&mut w, "]")?;
}
Element::Macros(_macros) => (),
Element::RadioTarget => (),
Element::Snippet(snippet) => write!(w, "@@{}:{}@@", snippet.name, snippet.value)?,
Element::Target(_target) => (),
Element::Text { value } => write!(w, "{}", value)?,
Element::Timestamp(timestamp) => {
write_timestamp(&mut w, &timestamp)?;
}
Element::Verbatim { value } => write!(w, "={}=", value)?,
Element::FnDef(fn_def) => {
write_blank_lines(w, fn_def.post_blank)?;
}
Element::Clock(clock) => {
write!(w, "CLOCK: ")?;
match clock {
Clock::Closed {
start,
end,
duration,
post_blank,
..
} => {
write_datetime(&mut w, "[", &start, "]--")?;
write_datetime(&mut w, "[", &end, "]")?;
writeln!(&mut w, " => {}", duration)?;
write_blank_lines(&mut w, *post_blank)?;
}
Clock::Running {
start, post_blank, ..
} => {
write_datetime(&mut w, "[", &start, "]\n")?;
write_blank_lines(&mut w, *post_blank)?;
}
}
}
Element::Comment(comment) => {
write!(w, "{}", comment.value)?;
write_blank_lines(&mut w, comment.post_blank)?;
}
Element::FixedWidth(fixed_width) => {
write!(&mut w, "{}", fixed_width.value)?;
write_blank_lines(&mut w, fixed_width.post_blank)?;
}
Element::Keyword(keyword) => {
write!(&mut w, "#+{}", keyword.key)?;
if let Some(optional) = &keyword.optional {
write!(&mut w, "[{}]", optional)?;
}
writeln!(&mut w, ": {}", keyword.value)?;
write_blank_lines(&mut w, keyword.post_blank)?;
}
Element::Rule(rule) => {
writeln!(w, "-----")?;
write_blank_lines(&mut w, rule.post_blank)?;
}
Element::Cookie(_cookie) => (),
Element::Title(title) => {
for _ in 0..title.level {
write!(&mut w, "*")?;
}
if let Some(keyword) = &title.keyword {
write!(&mut w, " {}", keyword)?;
}
if let Some(priority) = title.priority {
write!(&mut w, " [#{}]", priority)?;
}
write!(&mut w, " ")?;
}
Element::Table(_) => (),
Element::TableRow(_) => (),
Element::TableCell(_) => (),
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(block) => {
writeln!(&mut w, "#+END_{}", block.name)?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::QuoteBlock(block) => {
writeln!(&mut w, "#+END_QUOTE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::CenterBlock(block) => {
writeln!(&mut w, "#+END_CENTER")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::VerseBlock(block) => {
writeln!(&mut w, "#+END_VERSE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::Bold => write!(w, "*")?,
Element::Document { .. } => (),
Element::DynBlock(dyn_block) => {
writeln!(w, "#+END:")?;
write_blank_lines(w, dyn_block.post_blank)?;
}
Element::Headline { .. } => (),
Element::List(list) => {
write_blank_lines(w, list.post_blank)?;
}
Element::Italic => write!(w, "/")?,
Element::ListItem(_) => (),
Element::Paragraph { post_blank } => {
write_blank_lines(w, post_blank + 1)?;
}
Element::Section => (),
Element::Strike => write!(w, "+")?,
Element::Underline => write!(w, "_")?,
Element::Drawer(drawer) => {
writeln!(&mut w, ":END:")?;
write_blank_lines(&mut w, drawer.post_blank)?;
}
Element::Title(title) => {
if !title.tags.is_empty() {
write!(&mut w, " :")?;
for tag in &title.tags {
write!(&mut w, "{}:", tag)?;
}
}
writeln!(&mut w)?;
if let Some(planning) = &title.planning {
if let Some(scheduled) = &planning.scheduled {
write!(&mut w, "SCHEDULED: ")?;
write_timestamp(&mut w, &scheduled)?;
}
if let Some(deadline) = &planning.deadline {
if planning.scheduled.is_some() {
write!(&mut w, " ")?;
}
write!(&mut w, "DEADLINE: ")?;
write_timestamp(&mut w, &deadline)?;
}
if let Some(closed) = &planning.closed {
if planning.deadline.is_some() {
write!(&mut w, " ")?;
}
write!(&mut w, "CLOSED: ")?;
write_timestamp(&mut w, &closed)?;
}
writeln!(&mut w)?;
}
if !title.properties.is_empty() {
writeln!(&mut w, ":PROPERTIES:")?;
for (key, value) in title.properties.iter() {
writeln!(&mut w, ":{}: {}", key, value)?;
}
writeln!(&mut w, ":END:")?;
}
write_blank_lines(&mut w, title.post_blank)?;
}
Element::Table(Table::Org { post_blank, .. }) => {
write_blank_lines(w, *post_blank)?;
}
Element::Table(Table::TableEl { post_blank, .. }) => {
write_blank_lines(w, *post_blank)?;
}
Element::TableRow(_) => (),
Element::TableCell(_) => (),
// non-container elements
_ => debug_assert!(!element.is_container()),
}
Ok(())
}
}
fn write_blank_lines<W: Write>(mut w: W, count: usize) -> Result<(), Error> {
for _ in 0..count {
writeln!(w)?;
}
Ok(())
}
fn write_timestamp<W: Write>(mut w: W, timestamp: &Timestamp) -> Result<(), Error> {
match timestamp {
Timestamp::Active { start, .. } => {
write_datetime(w, "<", start, ">")?;
}
Timestamp::Inactive { start, .. } => {
write_datetime(w, "[", start, "]")?;
}
Timestamp::ActiveRange { start, end, .. } => {
write_datetime(&mut w, "<", start, ">--")?;
write_datetime(&mut w, "<", end, ">")?;
}
Timestamp::InactiveRange { start, end, .. } => {
write_datetime(&mut w, "[", start, "]--")?;
write_datetime(&mut w, "[", end, "]")?;
}
Timestamp::Diary { value } => write!(w, "<%%({})>", value)?,
}
Ok(())
}

282
src/export/traverse.rs Normal file
View file

@ -0,0 +1,282 @@
use crate::ast::*;
use crate::syntax::{SyntaxElement, SyntaxKind};
use rowan::ast::AstNode;
use SyntaxKind::*;
use super::event::{Container, Event};
#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)]
enum TraversalControl {
Up,
Stop,
Skip,
#[default]
Continue,
}
#[derive(Default)]
pub struct TraversalContext {
control: TraversalControl,
}
impl TraversalContext {
/// Stops traversal completely
pub fn stop(&mut self) {
self.control = TraversalControl::Stop;
}
/// Skips traversal of the current node's siblings
pub fn up(&mut self) {
self.control = TraversalControl::Up;
}
/// Skips traversal of the current node's descendants
pub fn skip(&mut self) {
self.control = TraversalControl::Skip;
}
/// Continues traversal
pub fn r#continue(&mut self) {
self.control = TraversalControl::Continue;
}
}
/// A trait for enumerating org syntax tree
///
/// ### `TraversalContext`
///
/// `TraversalContext` can be used to control the traversal.
///
/// For example, `ctx.skip()` will skips the traversal for current
/// element and its descendants and improve the traversal performance.
///
/// ```rust
/// use orgize::{
/// export::{Container, Event, HtmlExport, TraversalContext, Traverser},
/// Org,
/// };
/// use slugify::slugify;
///
/// #[derive(Default)]
/// struct Toc(HtmlExport);
///
/// impl Traverser for Toc {
/// fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
/// match event {
/// Event::Enter(Container::Headline(headline)) => {
/// let title = headline.title().map(|e| e.to_string()).collect::<String>();
/// self.0.push_str(&format!("<a href='#{}'>", slugify!(&title)));
/// for elem in headline.title() {
/// self.element(elem, ctx);
/// }
/// self.0.push_str("</a>");
/// if headline.headlines().count() > 0 {
/// self.0.push_str("<ul>");
/// }
/// }
/// Event::Leave(Container::Headline(headline)) => {
/// if headline.headlines().count() > 0 {
/// self.0.push_str("</ul>");
/// }
/// }
/// Event::Enter(Container::Section(_)) | Event::Leave(Container::Section(_)) => ctx.skip(),
/// Event::Enter(Container::Document(_)) | Event::Leave(Container::Document(_)) => {}
/// _ => self.0.event(event, ctx),
/// }
/// }
/// }
///
/// let org = Org::parse(r#"
/// * heading 1
/// section 1
/// ** heading 1.1
/// ** heading 1.2
/// * heading 2
/// section 2
/// * heading 3
/// **** heading 3.1"#);
/// let mut toc = Toc::default();
/// org.traverse(&mut toc);
/// assert_eq!(toc.0.finish(), "\
/// <a href='#heading-1'>heading 1</a>\
/// <ul><a href='#heading-1-1'>heading 1.1</a><a href='#heading-1-2'>heading 1.2</a></ul>\
/// <a href='#heading-2'>heading 2</a>\
/// <a href='#heading-3'>heading 3</a>\
/// <ul><a href='#heading-3-1'>heading 3.1</a></ul>");
/// ```
pub trait Traverser {
/// Handles traversal event
fn event(&mut self, event: Event, ctx: &mut TraversalContext);
fn element(&mut self, element: SyntaxElement, ctx: &mut TraversalContext) {
macro_rules! take_control {
() => {
match ctx.control {
TraversalControl::Stop => {
ctx.control = TraversalControl::Stop;
return;
}
TraversalControl::Up => {
ctx.control = TraversalControl::Skip;
return;
}
TraversalControl::Skip => {
ctx.control = TraversalControl::Continue;
return;
}
TraversalControl::Continue => {}
}
};
}
match element {
SyntaxElement::Node(node) => {
macro_rules! walk {
($ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::Enter(Container::$ast(node.clone())), ctx);
take_control!();
for child in node.syntax.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
self.event(Event::Leave(Container::$ast(node.clone())), ctx);
take_control!();
}};
(@$ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::$ast(node), ctx);
take_control!();
}};
}
match node.kind() {
DOCUMENT => walk!(Document),
HEADLINE => walk!(Headline),
SECTION => walk!(Section),
PARAGRAPH => walk!(Paragraph),
BOLD => walk!(Bold),
ITALIC => walk!(Italic),
STRIKE => walk!(Strike),
UNDERLINE => walk!(Underline),
LIST => walk!(List),
LIST_ITEM => walk!(ListItem),
CODE => walk!(Code),
INLINE_CALL => walk!(@InlineCall),
INLINE_SRC => walk!(@InlineSrc),
RULE => walk!(@Rule),
VERBATIM => walk!(Verbatim),
SPECIAL_BLOCK => walk!(SpecialBlock),
QUOTE_BLOCK => walk!(QuoteBlock),
CENTER_BLOCK => walk!(CenterBlock),
VERSE_BLOCK => walk!(VerseBlock),
COMMENT_BLOCK => walk!(CommentBlock),
EXAMPLE_BLOCK => walk!(ExampleBlock),
EXPORT_BLOCK => walk!(ExportBlock),
SOURCE_BLOCK => walk!(SourceBlock),
BABEL_CALL => walk!(BabelCall),
CLOCK => walk!(@Clock),
COOKIE => walk!(@Cookie),
RADIO_TARGET => walk!(RadioTarget),
DRAWER => walk!(Drawer),
DYN_BLOCK => walk!(DynBlock),
FN_DEF => walk!(FnDef),
FN_REF => walk!(FnRef),
MACROS => walk!(@Macros),
SNIPPET => walk!(@Snippet),
TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp),
TARGET => walk!(Target),
COMMENT => walk!(Comment),
FIXED_WIDTH => walk!(FixedWidth),
ORG_TABLE => walk!(OrgTable),
ORG_TABLE_RULE_ROW | ORG_TABLE_STANDARD_ROW => walk!(OrgTableRow),
ORG_TABLE_CELL => walk!(OrgTableCell),
LINK => walk!(Link),
LATEX_FRAGMENT => walk!(@LatexFragment),
LATEX_ENVIRONMENT => walk!(@LatexEnvironment),
ENTITY => walk!(@Entity),
LINE_BREAK => walk!(@LineBreak),
SUPERSCRIPT => walk!(Superscript),
SUBSCRIPT => walk!(Subscript),
KEYWORD => walk!(Keyword),
PROPERTY_DRAWER => walk!(PropertyDrawer),
#[cfg(feature = "syntax-org-fc")]
CLOZE => walk!(@Cloze),
BLOCK_CONTENT | LIST_ITEM_CONTENT => {
for child in node.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
}
_ => {}
}
}
SyntaxElement::Token(token) => {
if token.kind() == TEXT {
self.event(Event::Text(Token(token)), ctx);
take_control!();
}
}
};
}
}
pub struct FromFn<F: FnMut(Event)>(F);
impl<F: FnMut(Event)> Traverser for FromFn<F> {
fn event(&mut self, event: Event, _: &mut TraversalContext) {
(self.0)(event)
}
}
pub struct FromFnWithCtx<F: FnMut(Event, &mut TraversalContext)>(F);
impl<F: FnMut(Event, &mut TraversalContext)> Traverser for FromFnWithCtx<F> {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
(self.0)(event, ctx)
}
}
/// A helper for creating traverser
///
/// ```rust
/// use orgize::{
/// export::{from_fn, Container, Event, Traverser},
/// Org,
/// };
///
/// let mut count = 0;
/// let mut handler = from_fn(|event| {
/// if matches!(event, Event::Enter(Container::Headline(_))) {
/// count += 1;
/// }
/// });
/// Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler);
/// assert_eq!(count, 3);
/// ```
pub fn from_fn<F: FnMut(Event)>(f: F) -> FromFn<F> {
FromFn(f)
}
/// A helper for creating traverser
///
/// ```rust
/// use orgize::{
/// export::{from_fn_with_ctx, Container, Event, Traverser},
/// Org,
/// };
///
/// let mut count = 0;
/// let mut handler = from_fn_with_ctx(|event, ctx| {
/// if let Event::Enter(Container::Headline(hdl)) = event {
/// count += 1;
/// if &hdl.title_raw() == "cow" {
/// ctx.stop();
/// }
/// }
/// });
/// Org::parse("* 1\n* cow\n* 3").traverse(&mut handler);
/// assert_eq!(count, 2);
/// ```
pub fn from_fn_with_ctx<F: FnMut(Event, &mut TraversalContext)>(f: F) -> FromFnWithCtx<F> {
FromFnWithCtx(f)
}

File diff suppressed because it is too large Load diff

View file

@ -1,245 +1,23 @@
//! A Rust library for parsing orgmode files.
//!
//! [Live demo](https://orgize.herokuapp.com/)
//!
//! # Parse
//!
//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//!
//! [`Org::parse`]: struct.Org.html#method.parse
//!
//! ```rust
//! use orgize::Org;
//!
//! Org::parse("* DONE Title :tag:");
//! ```
//!
//! or [`Org::parse_custom`]:
//!
//! [`Org::parse_custom`]: struct.Org.html#method.parse_custom
//!
//! ```rust
//! use orgize::{Org, ParseConfig};
//!
//! Org::parse_custom(
//! "* TASK Title 1",
//! &ParseConfig {
//! // custom todo keywords
//! todo_keywords: (vec!["TASK".to_string()], vec![]),
//! ..Default::default()
//! },
//! );
//! ```
//!
//! # Iter
//!
//! [`Org::iter`] function will returns an iterator of [`Event`]s, which is
//! a simple wrapper of [`Element`].
//!
//! [`Org::iter`]: struct.Org.html#method.iter
//! [`Event`]: enum.Event.html
//! [`Element`]: elements/enum.Element.html
//!
//! ```rust
//! use orgize::Org;
//!
//! for event in Org::parse("* DONE Title :tag:").iter() {
//! // handling the event
//! }
//! ```
//!
//! **Note**: whether an element is container or not, it will appears twice in one loop.
//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//!
//! [`Event::Start(element)`]: enum.Event.html#variant.Start
//! [`Event::End(element)`]: enum.Event.html#variant.End
//!
//! # Render html
//!
//! You can call the [`Org::write_html`] function to generate html directly, which
//! uses the [`DefaultHtmlHandler`] internally:
//!
//! [`Org::write_html`]: struct.Org.html#method.write_html
//! [`DefaultHtmlHandler`]: export/struct.DefaultHtmlHandler.html
//!
//! ```rust
//! use orgize::Org;
//!
//! let mut writer = Vec::new();
//! Org::parse("* title\n*section*").write_html(&mut writer).unwrap();
//!
//! assert_eq!(
//! String::from_utf8(writer).unwrap(),
//! "<main><h1>title</h1><section><p><b>section</b></p></section></main>"
//! );
//! ```
//!
//! # Render html with custom `HtmlHandler`
//!
//! To customize html rendering, simply implementing [`HtmlHandler`] trait and passing
//! it to the [`Org::write_html_custom`] function.
//!
//! [`HtmlHandler`]: export/trait.HtmlHandler.html
//! [`Org::write_html_custom`]: struct.Org.html#method.write_html_custom
//!
//! The following code demonstrates how to add a id for every headline and return
//! own error type while rendering.
//!
//! ```rust
//! use std::convert::From;
//! use std::io::{Error as IOError, Write};
//! use std::string::FromUtf8Error;
//!
//! use orgize::export::{DefaultHtmlHandler, HtmlHandler};
//! use orgize::{Element, Org};
//! use slugify::slugify;
//!
//! #[derive(Debug)]
//! enum MyError {
//! IO(IOError),
//! Heading,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for MyError {
//! fn from(err: IOError) -> Self {
//! MyError::IO(err)
//! }
//! }
//!
//! impl From<FromUtf8Error> for MyError {
//! fn from(err: FromUtf8Error) -> Self {
//! MyError::Utf8(err)
//! }
//! }
//!
//! #[derive(Default)]
//! struct MyHtmlHandler(DefaultHtmlHandler);
//!
//! impl HtmlHandler<MyError> for MyHtmlHandler {
//! fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
//! if let Element::Title(title) = element {
//! if title.level > 6 {
//! return Err(MyError::Heading);
//! } else {
//! write!(
//! w,
//! "<h{0}><a id=\"{1}\" href=\"#{1}\">",
//! title.level,
//! slugify!(&title.raw),
//! )?;
//! }
//! } else {
//! // fallthrough to default handler
//! self.0.start(w, element)?;
//! }
//! Ok(())
//! }
//!
//! fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
//! if let Element::Title(title) = element {
//! write!(w, "</a></h{}>", title.level)?;
//! } else {
//! self.0.end(w, element)?;
//! }
//! Ok(())
//! }
//! }
//!
//! fn main() -> Result<(), MyError> {
//! let mut writer = Vec::new();
//! let mut handler = MyHtmlHandler::default();
//! Org::parse("* title\n*section*").write_html_custom(&mut writer, &mut handler)?;
//!
//! assert_eq!(
//! String::from_utf8(writer)?,
//! "<main><h1><a id=\"title\" href=\"#title\">title</a></h1>\
//! <section><p><b>section</b></p></section></main>"
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! **Note**: as I mentioned above, each element will appears two times while iterating.
//! And handler will silently ignores all end events from non-container elements.
//!
//! So if you want to change how a non-container element renders, just redefine the `start`
//! function and leave the `end` function unchanged.
//!
//! # Serde
//!
//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
//! serialize it into any format supported by serde, such as json:
//!
//! ```rust
//! use orgize::Org;
//! use serde_json::{json, to_string};
//!
//! let org = Org::parse("I 'm *bold*.");
//! #[cfg(feature = "ser")]
//! println!("{}", to_string(&org).unwrap());
//!
//! // {
//! // "type": "document",
//! // "children": [{
//! // "type": "section",
//! // "children": [{
//! // "type": "paragraph",
//! // "children":[{
//! // "type": "text",
//! // "value":"I 'm "
//! // }, {
//! // "type": "bold",
//! // "children":[{
//! // "type": "text",
//! // "value": "bold"
//! // }]
//! // }, {
//! // "type":"text",
//! // "value":"."
//! // }]
//! // }]
//! // }]
//! // }
//! ```
//!
//! # Features
//!
//! By now, orgize provides three features:
//!
//! + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
//!
//! + `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default.
//!
//! + `syntect`: provides [`SyntectHtmlHandler`] for highlighting code block, disabled by default.
//!
//! [`SyntectHtmlHandler`]: export/struct.SyntectHtmlHandler.html
//!
//! # License
//!
//! MIT
#![doc = include_str!("../README.md")]
mod config;
pub mod elements;
pub mod ast;
pub mod config;
mod entities;
pub mod export;
mod headline;
mod org;
mod parse;
mod parsers;
mod validate;
mod replace;
mod syntax;
#[cfg(test)]
mod tests;
// Re-export of the indextree crate.
pub use indextree;
#[cfg(feature = "syntect")]
pub use syntect;
// Re-export of the rowan crate.
pub use rowan;
pub use config::ParseConfig;
pub use elements::Element;
pub use headline::{Document, Headline};
pub use org::{Event, Org};
pub use validate::ValidationError;
pub use org::Org;
pub use rowan::{TextRange, TextSize};
pub use syntax::{
SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
};
#[cfg(feature = "wasm")]
mod wasm;
pub(crate) use syntax::combinator::lossless_parser;

View file

@ -1,193 +1,106 @@
use indextree::{Arena, NodeEdge, NodeId};
use std::io::{Error, Write};
use std::ops::{Index, IndexMut};
use rowan::ast::AstNode;
use rowan::{GreenNode, TextSize};
use crate::{
config::{ParseConfig, DEFAULT_CONFIG},
elements::{Element, Keyword},
export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler},
parsers::{blank_lines_count, parse_container, Container, OwnedArena},
};
pub struct Org<'a> {
pub(crate) arena: Arena<Element<'a>>,
pub(crate) root: NodeId,
}
use crate::ast::Document;
use crate::config::ParseConfig;
use crate::export::{HtmlExport, TraversalContext, Traverser};
use crate::syntax::{OrgLanguage, SyntaxNode};
use crate::SyntaxElement;
#[derive(Debug)]
pub enum Event<'a, 'b> {
Start(&'b Element<'a>),
End(&'b Element<'a>),
pub struct Org {
pub(crate) green: GreenNode,
pub(crate) config: ParseConfig,
}
impl<'a> Org<'a> {
/// Creates a new empty `Org` struct.
pub fn new() -> Org<'static> {
let mut arena = Arena::new();
let root = arena.new_node(Element::Document { pre_blank: 0 });
Org { arena, root }
impl Org {
/// Parse input string to Org element tree using default parse config
pub fn parse(input: impl AsRef<str>) -> Org {
ParseConfig::default().parse(input)
}
/// Parses string `text` into `Org` struct.
pub fn parse(text: &'a str) -> Org<'a> {
Org::parse_custom(text, &DEFAULT_CONFIG)
pub fn green(&self) -> &GreenNode {
&self.green
}
/// Likes `parse`, but accepts `String`.
pub fn parse_string(text: String) -> Org<'static> {
Org::parse_string_custom(text, &DEFAULT_CONFIG)
pub fn config(&self) -> &ParseConfig {
&self.config
}
/// Parses string `text` into `Org` struct with custom `ParseConfig`.
pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines_count(text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };
/// Returns the document
pub fn document(&self) -> Document {
Document {
syntax: SyntaxNode::new_root(self.green.clone()),
}
}
parse_container(
&mut org.arena,
Container::Document {
content: text,
node: org.root,
},
config,
/// Returns org-mode string
pub fn to_org(&self) -> String {
self.green.to_string()
}
/// Convert org element tree to html-format using default html handler
pub fn to_html(&self) -> String {
let mut handler = HtmlExport::default();
self.traverse(&mut handler);
handler.finish()
}
/// Walk through org element tree using given traverser
pub fn traverse<T: Traverser>(&self, t: &mut T) {
let mut ctx = TraversalContext::default();
t.element(
SyntaxElement::Node(SyntaxNode::new_root(self.green.clone())),
&mut ctx,
);
org.debug_validate();
org
}
/// Likes `parse_custom`, but accepts `String`.
pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines_count(&text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };
parse_container(
&mut OwnedArena::new(&mut org.arena),
Container::Document {
content: text,
node: org.root,
},
config,
);
org.debug_validate();
org
}
/// Returns a reference to the underlay arena.
pub fn arena(&self) -> &Arena<Element<'a>> {
&self.arena
}
/// Returns a mutual reference to the underlay arena.
pub fn arena_mut(&mut self) -> &mut Arena<Element<'a>> {
&mut self.arena
}
/// Returns an iterator of `Event`s.
pub fn iter<'b>(&'b self) -> impl Iterator<Item = Event<'a, 'b>> + 'b {
self.root.traverse(&self.arena).map(move |edge| match edge {
NodeEdge::Start(node) => Event::Start(&self[node]),
NodeEdge::End(node) => Event::End(&self[node]),
})
}
/// Returns an iterator of `Keyword`s.
pub fn keywords(&self) -> impl Iterator<Item = &Keyword<'_>> {
self.root
.descendants(&self.arena)
.skip(1)
.filter_map(move |node| match &self[node] {
Element::Keyword(kw) => Some(kw),
_ => None,
})
}
/// Writes an `Org` struct as html format.
pub fn write_html<W>(&self, writer: W) -> Result<(), Error>
where
W: Write,
{
self.write_html_custom(writer, &mut DefaultHtmlHandler)
}
/// Writes an `Org` struct as html format with custom `HtmlHandler`.
pub fn write_html_custom<W, H, E>(&self, mut writer: W, handler: &mut H) -> Result<(), E>
where
W: Write,
E: From<Error>,
H: HtmlHandler<E>,
{
for event in self.iter() {
match event {
Event::Start(element) => handler.start(&mut writer, element)?,
Event::End(element) => handler.end(&mut writer, element)?,
/// Returns the first node in org element tree in depth first order
pub fn first_node<N: AstNode<Language = OrgLanguage>>(&self) -> Option<N> {
fn find<N: AstNode<Language = OrgLanguage>>(node: SyntaxNode) -> Option<N> {
if N::can_cast(node.kind()) {
N::cast(node)
} else {
node.children().find_map(find)
}
}
Ok(())
find(SyntaxNode::new_root(self.green.clone()))
}
/// Writes an `Org` struct as org format.
pub fn write_org<W>(&self, writer: W) -> Result<(), Error>
where
W: Write,
{
self.write_org_custom(writer, &mut DefaultOrgHandler)
}
/// Writes an `Org` struct as org format with custom `OrgHandler`.
pub fn write_org_custom<W, H, E>(&self, mut writer: W, handler: &mut H) -> Result<(), E>
where
W: Write,
E: From<Error>,
H: OrgHandler<E>,
{
for event in self.iter() {
match event {
Event::Start(element) => handler.start(&mut writer, element)?,
Event::End(element) => handler.end(&mut writer, element)?,
/// Returns node in given offset
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let org = Org::parse("\n\n* foo\n* bar");
///
/// assert!(org.node_at_offset::<Headline>(0).is_none());
///
/// let hdl = org.node_at_offset::<Headline>(2).unwrap();
/// assert_eq!(hdl.title_raw(), "foo");
///
/// let hdl = org.node_at_offset::<Headline>(9).unwrap();
/// assert_eq!(hdl.title_raw(), "bar");
///
/// assert!(org.node_at_offset::<Headline>(999).is_none());
/// ```
pub fn node_at_offset<N: AstNode<Language = OrgLanguage>>(
&self,
offset: impl Into<TextSize>,
) -> Option<N> {
let offset = offset.into();
fn find<N: AstNode<Language = OrgLanguage>>(
node: SyntaxNode,
offset: TextSize,
) -> Option<N> {
if !node.text_range().contains(offset) {
None
} else if N::can_cast(node.kind()) {
N::cast(node)
} else {
node.children().find_map(|node| find(node, offset))
}
}
Ok(())
}
}
impl Default for Org<'static> {
fn default() -> Self {
Org::new()
}
}
impl<'a> Index<NodeId> for Org<'a> {
type Output = Element<'a>;
fn index(&self, node_id: NodeId) -> &Self::Output {
self.arena[node_id].get()
}
}
impl<'a> IndexMut<NodeId> for Org<'a> {
fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output {
self.arena[node_id].get_mut()
}
}
#[cfg(feature = "ser")]
use serde::{ser::Serializer, Serialize};
#[cfg(feature = "ser")]
impl Serialize for Org<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde_indextree::Node;
serializer.serialize_newtype_struct("Org", &Node::new(self.root, &self.arena))
find(SyntaxNode::new_root(self.green.clone()), offset)
}
}

View file

@ -1,136 +0,0 @@
//! Parsers combinators
use memchr::memchr;
use nom::{
bytes::complete::take_while1,
combinator::verify,
error::{make_error, ErrorKind},
Err, IResult,
};
// read until the first line_ending, if line_ending is not present, return the input directly
pub fn line(input: &str) -> IResult<&str, &str, ()> {
if let Some(i) = memchr(b'\n', input.as_bytes()) {
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
Ok((&input[i + 1..], &input[0..i - 1]))
} else {
Ok((&input[i + 1..], &input[0..i]))
}
} else {
Ok(("", input))
}
}
pub fn lines_till<F>(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()>
where
F: Fn(&str) -> bool,
{
move |i| {
let mut input = i;
loop {
// TODO: better error kind
if input.is_empty() {
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if predicate(line_) {
let offset = i.len() - input.len();
return Ok((input_, &i[0..offset]));
}
input = input_;
}
}
}
pub fn lines_while<F>(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()>
where
F: Fn(&str) -> bool,
{
move |i| {
let mut input = i;
loop {
// unlike lines_till, line_while won't return error
if input.is_empty() {
return Ok(("", i));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if !predicate(line_) {
let offset = i.len() - input.len();
return Ok((input, &i[0..offset]));
}
input = input_;
}
}
}
#[test]
fn test_lines_while() {
assert_eq!(lines_while(|line| line == "foo")("foo"), Ok(("", "foo")));
assert_eq!(lines_while(|line| line == "foo")("bar"), Ok(("bar", "")));
assert_eq!(
lines_while(|line| line == "foo")("foo\n\n"),
Ok(("\n", "foo\n"))
);
assert_eq!(
lines_while(|line| line.trim().is_empty())("\n\n\n"),
Ok(("", "\n\n\n"))
);
}
pub fn eol(input: &str) -> IResult<&str, &str, ()> {
verify(line, |s: &str| {
s.as_bytes().iter().all(u8::is_ascii_whitespace)
})(input)
}
pub fn one_word(input: &str) -> IResult<&str, &str, ()> {
take_while1(|c: char| !c.is_ascii_whitespace())(input)
}
pub fn blank_lines_count(input: &str) -> IResult<&str, usize, ()> {
let mut count = 0;
let mut input = input;
loop {
if input.is_empty() {
return Ok(("", count));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if !line_.chars().all(char::is_whitespace) {
return Ok((input, count));
}
count += 1;
input = input_;
}
}
#[test]
fn test_blank_lines_count() {
assert_eq!(blank_lines_count("foo"), Ok(("foo", 0)));
assert_eq!(blank_lines_count(" foo"), Ok((" foo", 0)));
assert_eq!(blank_lines_count(" \t\nfoo\n"), Ok(("foo\n", 1)));
assert_eq!(blank_lines_count("\n \r\n\nfoo\n"), Ok(("foo\n", 3)));
assert_eq!(
blank_lines_count("\r\n \n \r\n foo\n"),
Ok((" foo\n", 3))
);
assert_eq!(blank_lines_count("\r\n \n \r\n \n"), Ok(("", 4)));
}

View file

@ -1 +0,0 @@
pub mod combinators;

View file

@ -1,657 +0,0 @@
use std::iter::once;
use std::marker::PhantomData;
use indextree::{Arena, NodeId};
use jetscii::{bytes, BytesConst};
use memchr::{memchr, memchr_iter};
use nom::bytes::complete::take_while1;
use crate::config::ParseConfig;
use crate::elements::{
block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target,
Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target,
Timestamp, Title,
};
use crate::parse::combinators::lines_while;
pub trait ElementArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>;
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>;
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>;
}
pub type BorrowedArena<'a> = Arena<Element<'a>>;
impl<'a> ElementArena<'a> for BorrowedArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
let node = self.new_node(element.into());
parent.append(node, self);
node
}
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
if let Some(child) = self[parent].last_child() {
let node = self.new_node(element.into());
child.insert_before(node, self);
node
} else {
self.append(element, parent)
}
}
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>,
{
*self[node].get_mut() = element.into();
}
}
pub struct OwnedArena<'a, 'b, 'c> {
arena: &'b mut Arena<Element<'c>>,
phantom: PhantomData<&'a ()>,
}
impl<'a, 'b, 'c> OwnedArena<'a, 'b, 'c> {
pub fn new(arena: &'b mut Arena<Element<'c>>) -> OwnedArena<'a, 'b, 'c> {
OwnedArena {
arena,
phantom: PhantomData,
}
}
}
impl<'a> ElementArena<'a> for OwnedArena<'a, '_, '_> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
self.arena.append(element.into().into_owned(), parent)
}
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
self.arena
.insert_before_last_child(element.into().into_owned(), parent)
}
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>,
{
self.arena.set(node, element.into().into_owned());
}
}
#[derive(Debug)]
pub enum Container<'a> {
// Block, List Item
Block { content: &'a str, node: NodeId },
// Paragraph, Inline Markup
Inline { content: &'a str, node: NodeId },
// Headline
Headline { content: &'a str, node: NodeId },
// Document
Document { content: &'a str, node: NodeId },
}
pub fn parse_container<'a, T: ElementArena<'a>>(
arena: &mut T,
container: Container<'a>,
config: &ParseConfig,
) {
let containers = &mut vec![container];
while let Some(container) = containers.pop() {
match container {
Container::Document { content, node } => {
parse_section_and_headlines(arena, content, node, containers);
}
Container::Headline { content, node } => {
parse_headline_content(arena, content, node, containers, config);
}
Container::Block { content, node } => {
parse_blocks(arena, content, node, containers);
}
Container::Inline { content, node } => {
parse_inlines(arena, content, node, containers);
}
}
}
}
pub fn parse_headline_content<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
config: &ParseConfig,
) {
let (tail, (title, content)) = Title::parse(content, config).unwrap();
let node = arena.append(title, parent);
containers.push(Container::Inline { content, node });
parse_section_and_headlines(arena, tail, parent, containers);
}
pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let content = blank_lines_count(content).0;
if content.is_empty() {
return;
}
let mut last_end = 0;
for i in memchr_iter(b'\n', content.as_bytes()).chain(once(content.len())) {
if let Some((mut tail, (headline_content, level))) = parse_headline(&content[last_end..]) {
if last_end != 0 {
let node = arena.append(Element::Section, parent);
let content = &content[0..last_end];
containers.push(Container::Block { content, node });
}
let node = arena.append(Element::Headline { level }, parent);
containers.push(Container::Headline {
content: headline_content,
node,
});
while let Some((new_tail, (content, level))) = parse_headline(tail) {
debug_assert_ne!(tail, new_tail);
let node = arena.append(Element::Headline { level }, parent);
containers.push(Container::Headline { content, node });
tail = new_tail;
}
return;
}
last_end = i + 1;
}
let node = arena.append(Element::Section, parent);
containers.push(Container::Block { content, node });
}
pub fn parse_blocks<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = blank_lines_count(content).0;
if let Some(new_tail) = parse_block(content, arena, parent, containers) {
tail = blank_lines_count(new_tail).0;
}
let mut text = tail;
let mut pos = 0;
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
let (tail_, blank) = blank_lines_count(&tail[i..]);
debug_assert_ne!(tail, tail_);
tail = tail_;
let node = arena.append(
Element::Paragraph {
// including the current line (&tail[0..i])
post_blank: blank + 1,
},
parent,
);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
pos = 0;
text = tail;
} else if let Some(new_tail) = parse_block(tail, arena, parent, containers) {
if pos != 0 {
let node =
arena.insert_before_last_child(Element::Paragraph { post_blank: 0 }, parent);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
pos = 0;
}
debug_assert_ne!(tail, blank_lines_count(new_tail).0);
tail = blank_lines_count(new_tail).0;
text = tail;
} else {
debug_assert_ne!(tail, &tail[i..]);
tail = &tail[i..];
pos += i;
}
}
if !text.is_empty() {
let node = arena.append(Element::Paragraph { post_blank: 0 }, parent);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
}
}
pub fn parse_block<'a, T: ElementArena<'a>>(
contents: &'a str,
arena: &mut T,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> {
match contents
.as_bytes()
.iter()
.find(|c| !c.is_ascii_whitespace())?
{
b'[' => {
let (tail, (fn_def, content)) = FnDef::parse(contents)?;
let node = arena.append(fn_def, parent);
containers.push(Container::Block { content, node });
Some(tail)
}
b'0'..=b'9' | b'*' => {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
b'C' => {
let (tail, clock) = Clock::parse(contents)?;
arena.append(clock, parent);
Some(tail)
}
b'\'' => {
// TODO: LaTeX environment
None
}
b'-' => {
if let Some((tail, rule)) = Rule::parse(contents) {
arena.append(rule, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b':' => {
if let Some((tail, (drawer, content))) = Drawer::parse(contents) {
let node = arena.append(drawer, parent);
containers.push(Container::Block { content, node });
Some(tail)
} else {
let (tail, fixed_width) = FixedWidth::parse(contents)?;
arena.append(fixed_width, parent);
Some(tail)
}
}
b'|' => {
let tail = parse_org_table(arena, contents, containers, parent);
Some(tail)
}
b'+' => {
if let Some((tail, table)) = Table::parse_table_el(contents) {
arena.append(table, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b'#' => {
if let Some((tail, block)) = RawBlock::parse(contents) {
let (element, content) = block.into_element();
// avoid use after free
let is_block_container = match element {
Element::CenterBlock(_)
| Element::QuoteBlock(_)
| Element::VerseBlock(_)
| Element::SpecialBlock(_) => true,
_ => false,
};
let node = arena.append(element, parent);
if is_block_container {
containers.push(Container::Block { content, node });
}
Some(tail)
} else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) {
let node = arena.append(dyn_block, parent);
containers.push(Container::Block { content, node });
Some(tail)
} else if let Some((tail, keyword)) = RawKeyword::parse(contents) {
arena.append(keyword.into_element(), parent);
Some(tail)
} else {
let (tail, comment) = Comment::parse(contents)?;
arena.append(comment, parent);
Some(tail)
}
}
_ => None,
}
}
struct InlinePositions<'a> {
bytes: &'a [u8],
pos: usize,
next: Option<usize>,
}
impl InlinePositions<'_> {
fn new(bytes: &[u8]) -> InlinePositions {
InlinePositions {
bytes,
pos: 0,
next: Some(0),
}
}
}
impl Iterator for InlinePositions<'_> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
lazy_static::lazy_static! {
static ref PRE_BYTES: BytesConst =
bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
}
self.next.take().or_else(|| {
PRE_BYTES.find(&self.bytes[self.pos..]).map(|i| {
self.pos += i + 1;
match self.bytes[self.pos - 1] {
b'{' => {
self.next = Some(self.pos);
self.pos - 1
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
_ => self.pos - 1,
}
})
})
}
}
pub fn parse_inlines<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = content;
if let Some(tail_) = parse_inline(tail, arena, containers, parent) {
tail = tail_;
}
while let Some((tail_, i)) = InlinePositions::new(tail.as_bytes())
.filter_map(|i| parse_inline(&tail[i..], arena, containers, parent).map(|tail| (tail, i)))
.next()
{
if i != 0 {
arena.insert_before_last_child(
Element::Text {
value: tail[0..i].into(),
},
parent,
);
}
tail = tail_;
}
if !tail.is_empty() {
arena.append(Element::Text { value: tail.into() }, parent);
}
}
pub fn parse_inline<'a, T: ElementArena<'a>>(
contents: &'a str,
arena: &mut T,
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> Option<&'a str> {
if contents.len() < 3 {
return None;
}
let byte = contents.as_bytes()[0];
match byte {
b'@' => {
let (tail, snippet) = Snippet::parse(contents)?;
arena.append(snippet, parent);
Some(tail)
}
b'{' => {
let (tail, macros) = Macros::parse(contents)?;
arena.append(macros, parent);
Some(tail)
}
b'<' => {
if let Some((tail, _content)) = parse_radio_target(contents) {
arena.append(Element::RadioTarget, parent);
Some(tail)
} else if let Some((tail, target)) = Target::parse(contents) {
arena.append(target, parent);
Some(tail)
} else if let Some((tail, timestamp)) = Timestamp::parse_active(contents) {
arena.append(timestamp, parent);
Some(tail)
} else {
let (tail, timestamp) = Timestamp::parse_diary(contents)?;
arena.append(timestamp, parent);
Some(tail)
}
}
b'[' => {
if let Some((tail, fn_ref)) = FnRef::parse(contents) {
arena.append(fn_ref, parent);
Some(tail)
} else if let Some((tail, link)) = Link::parse(contents) {
arena.append(link, parent);
Some(tail)
} else if let Some((tail, cookie)) = Cookie::parse(contents) {
arena.append(cookie, parent);
Some(tail)
} else {
let (tail, timestamp) = Timestamp::parse_inactive(contents)?;
arena.append(timestamp, parent);
Some(tail)
}
}
b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => {
let (tail, emphasis) = Emphasis::parse(contents, byte)?;
let (element, content) = emphasis.into_element();
let is_inline_container = match element {
Element::Bold | Element::Strike | Element::Italic | Element::Underline => true,
_ => false,
};
let node = arena.append(element, parent);
if is_inline_container {
containers.push(Container::Inline { content, node });
}
Some(tail)
}
b's' => {
let (tail, inline_src) = InlineSrc::parse(contents)?;
arena.append(inline_src, parent);
Some(tail)
}
b'c' => {
let (tail, inline_call) = InlineCall::parse(contents)?;
arena.append(inline_call, parent);
Some(tail)
}
_ => None,
}
}
pub fn parse_list<'a, T: ElementArena<'a>>(
arena: &mut T,
contents: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> {
let (mut tail, (first_item, content)) = ListItem::parse(contents)?;
let first_item_indent = first_item.indent;
let first_item_ordered = first_item.ordered;
let parent = arena.append(Element::Document { pre_blank: 0 }, parent); // placeholder
let node = arena.append(first_item, parent);
containers.push(Container::Block { content, node });
while let Some((tail_, (item, content))) = ListItem::parse(tail) {
if item.indent == first_item_indent {
let node = arena.append(item, parent);
containers.push(Container::Block { content, node });
debug_assert_ne!(tail, tail_);
tail = tail_;
} else {
break;
}
}
let (tail, post_blank) = blank_lines_count(tail);
arena.set(
parent,
List {
indent: first_item_indent,
ordered: first_item_ordered,
post_blank,
},
);
Some(tail)
}
pub fn parse_org_table<'a, T: ElementArena<'a>>(
arena: &mut T,
contents: &'a str,
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> &'a str {
let (tail, contents) =
lines_while(|line| line.trim_start().starts_with('|'))(contents).unwrap_or((contents, ""));
let (tail, post_blank) = blank_lines_count(tail);
let mut iter = contents.trim_end().lines().peekable();
let mut lines = vec![];
let mut has_header = false;
// TODO: merge contiguous rules
if let Some(line) = iter.next() {
let line = line.trim_start();
if !line.starts_with("|-") {
lines.push(line);
}
}
while let Some(line) = iter.next() {
let line = line.trim_start();
if iter.peek().is_none() && line.starts_with("|-") {
break;
} else if line.starts_with("|-") {
has_header = true;
}
lines.push(line);
}
let parent = arena.append(
Table::Org {
tblfm: None,
post_blank,
has_header,
},
parent,
);
for line in lines {
if line.starts_with("|-") {
if has_header {
arena.append(Element::TableRow(TableRow::HeaderRule), parent);
has_header = false;
} else {
arena.append(Element::TableRow(TableRow::BodyRule), parent);
}
} else {
if has_header {
let parent = arena.append(Element::TableRow(TableRow::Header), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Header), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
} else {
let parent = arena.append(Element::TableRow(TableRow::Body), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Body), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
}
}
}
tail
}
pub fn blank_lines_count(input: &str) -> (&str, usize) {
crate::parse::combinators::blank_lines_count(input).unwrap_or((input, 0))
}
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
let (input_, level) = parse_headline_level(input)?;
let (input_, content) = lines_while(move |line| {
parse_headline_level(line)
.map(|(_, l)| l > level)
.unwrap_or(true)
})(input_)
.unwrap_or((input_, ""));
Some((input_, (&input[0..level + content.len()], level)))
}
pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> {
let (input, stars) = take_while1::<_, _, ()>(|c: char| c == '*')(input).ok()?;
if input.starts_with(' ') || input.starts_with('\n') || input.is_empty() {
Some((input, stars.len()))
} else {
None
}
}

326
src/replace.rs Normal file
View file

@ -0,0 +1,326 @@
use rowan::{
ast::{support, AstNode},
SyntaxNode, TextRange, TextSize, TokenAtOffset,
};
use crate::ast::Headline;
use crate::syntax::{
combinator::line_starts_iter, document::document_node, headline::headline_node, OrgLanguage,
};
use crate::Org;
#[derive(Debug)]
enum RangeShape {
InsideHeadline { headline: Headline, level: usize },
ExactHeadline { headline: Headline, level: usize },
Other,
}
impl RangeShape {
pub fn new(mut node: SyntaxNode<OrgLanguage>, range: TextRange) -> Self {
let mut result = RangeShape::Other;
'l: loop {
for headline in support::children::<Headline>(&node) {
let level = headline.level();
let start = headline.syntax.text_range().start();
let end = headline.syntax.text_range().end();
if headline.syntax.text_range() == range {
result = RangeShape::ExactHeadline { headline, level };
break 'l;
}
if TextRange::new(start + TextSize::from(level as u32 + 1), end)
.contains_range(range)
{
node = headline.syntax.clone();
result = RangeShape::InsideHeadline { headline, level };
continue 'l;
}
}
break;
}
result
}
}
#[derive(Debug, PartialEq)]
enum ReplaceWithShape {
IncludeHeadline { level: usize },
ExactHeadline { level: usize },
Other,
}
impl ReplaceWithShape {
fn new(text: &str) -> Self {
let mut result = ReplaceWithShape::Other;
for start in line_starts_iter(text) {
let level = text[start..].bytes().take_while(|&c| c == b'*').count();
if level == 0 {
continue;
}
if !matches!(text[start..].as_bytes().get(level), Some(b' ')) {
continue;
}
match result {
ReplaceWithShape::IncludeHeadline { level: l } => {
if level < l {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
ReplaceWithShape::ExactHeadline { level: l } => {
if level <= l {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
ReplaceWithShape::Other => {
if start == 0 {
result = ReplaceWithShape::ExactHeadline { level }
} else {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
}
}
result
}
}
impl Org {
/// Replace specified range with given text, and reparse the syntax tree with current config
///
/// This method optimizes parsing by analyzing the selected range and given text, and reducing
/// the amount of data processed by parser.
///
/// ```rust
/// use orgize::{Org, ast::Headline, TextRange, TextSize};
///
/// let mut org = Org::parse("** hello");
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 2);
///
/// // replace '**' with '*****'
/// org.replace_range(TextRange::new(0.into(), 2.into()), "*****");
/// // since the syntax tree is changed, we have to query again
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 5);
/// ```
pub fn replace_range(&mut self, range: TextRange, replace_with: impl AsRef<str>) {
let replace_with = replace_with.as_ref();
match (
RangeShape::new(self.document().syntax, range),
ReplaceWithShape::new(replace_with),
) {
(
RangeShape::ExactHeadline { headline, level },
ReplaceWithShape::IncludeHeadline { level: new_level },
)
| (
RangeShape::InsideHeadline { headline, level },
ReplaceWithShape::IncludeHeadline { level: new_level },
) if level < new_level => self.replace_headline(headline, range, replace_with),
(
RangeShape::ExactHeadline { headline, level },
ReplaceWithShape::ExactHeadline { level: new_level },
) if level <= new_level
// non-last headline must ends with a newline
&& (headline.end() == self.document().end()
|| replace_with.ends_with(&['\n', '\r'])) =>
{
self.replace_headline(headline, range, replace_with)
}
(
RangeShape::InsideHeadline { headline, level },
ReplaceWithShape::ExactHeadline { level: new_level },
) if level <= new_level && follows_newline(headline.syntax(), range.start()) => {
self.replace_headline(headline, range, replace_with)
}
_ => self.full_parse(range, replace_with),
}
}
fn full_parse(&mut self, range: TextRange, replace_with: &str) {
if self.document().syntax().text_range() == range {
let input = (replace_with, &self.config).into();
self.green = document_node(input).unwrap().1.into_node().unwrap();
} else {
let start: usize = range.start().into();
let end: usize = range.end().into();
let mut text = self.green.to_string();
text.replace_range(start..end, replace_with);
let input = (text.as_ref(), &self.config).into();
self.green = document_node(input).unwrap().1.into_node().unwrap();
}
}
fn replace_headline(&mut self, headline: Headline, range: TextRange, replace_with: &str) {
if headline.syntax().text_range() == range {
let input = (replace_with, &self.config).into();
self.green = headline
.syntax
.replace_with(headline_node(input).unwrap().1.into_node().unwrap());
} else {
let offset: usize = headline.syntax.text_range().start().into();
let start: usize = range.start().into();
let end: usize = range.end().into();
let mut text = headline.syntax.to_string();
text.replace_range((start - offset)..(end - offset), replace_with);
let input = (text.as_ref(), &self.config).into();
self.green = headline
.syntax
.replace_with(headline_node(input).unwrap().1.into_node().unwrap());
}
}
}
fn follows_newline(syntax: &SyntaxNode<OrgLanguage>, offset: TextSize) -> bool {
match syntax.token_at_offset(offset) {
TokenAtOffset::None => false,
TokenAtOffset::Single(t) => {
let offset: usize = (offset - t.text_range().start()).into();
t.text()[offset..].ends_with('\n') || t.text()[offset..].ends_with('\r')
}
TokenAtOffset::Between(t, _) => t.text().ends_with('\n') || t.text().ends_with('\r'),
}
}
#[test]
fn replace() {
assert!(follows_newline(
Org::parse("\n*a*").document().syntax(),
TextSize::new(1)
));
assert!(follows_newline(
Org::parse(" \na").document().syntax(),
TextSize::new(1)
));
assert!(follows_newline(
Org::parse(" \ra").document().syntax(),
TextSize::new(1)
));
assert!(!follows_newline(
Org::parse(" *a*").document().syntax(),
TextSize::new(1)
));
assert!(!follows_newline(
Org::parse(" a").document().syntax(),
TextSize::new(1)
));
assert_eq!(ReplaceWithShape::new(""), ReplaceWithShape::Other);
assert_eq!(ReplaceWithShape::new(" ** a"), ReplaceWithShape::Other);
assert_eq!(
ReplaceWithShape::new("\n** a"),
ReplaceWithShape::IncludeHeadline { level: 2 }
);
assert_eq!(
ReplaceWithShape::new("** a"),
ReplaceWithShape::ExactHeadline { level: 2 }
);
assert_eq!(
ReplaceWithShape::new("** a\n* 1"),
ReplaceWithShape::IncludeHeadline { level: 1 }
);
assert_eq!(
ReplaceWithShape::new("* a\n** 1"),
ReplaceWithShape::ExactHeadline { level: 1 }
);
assert_eq!(
ReplaceWithShape::new("** a\n** 1"),
ReplaceWithShape::IncludeHeadline { level: 2 }
);
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(0.into(), 7.into())
),
RangeShape::ExactHeadline { level: 2, .. }
));
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(3.into(), 7.into())
),
RangeShape::InsideHeadline { level: 2, .. }
));
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(2.into(), 7.into())
),
RangeShape::Other
));
assert!(matches!(
RangeShape::new(
Org::parse("* abc\n** b").document().syntax,
TextRange::new(4.into(), 7.into())
),
RangeShape::InsideHeadline { level: 1, .. }
));
macro_rules! t {
($input:literal, $replace:literal) => {
let start = $input.find('|').unwrap();
let end = $input.rfind('|').unwrap();
let input = format!(
"{}{}{}",
&$input[0..start],
&$input[start + 1..end],
&$input[end + 1..]
);
let output = format!("{}{}{}", &$input[0..start], $replace, &$input[end + 1..]);
let mut org = Org::parse(input);
org.replace_range(
TextRange::new((start as u32).into(), (end as u32 - 1).into()),
$replace,
);
debug_assert_eq!(
format!("{:#?}", org.document().syntax),
format!("{:#?}", Org::parse(output).document().syntax),
);
};
}
t!("||", "");
t!("||", "** abc");
t!("*** abc |edf|", "fde");
t!("*|** abc edf|", "fde");
t!("* abc \n|** edf|", "** abc");
t!("* ab|c \n*| edf", "** abc");
t!("* abc \n|** edf|", "** abc");
t!("* abc \n|** edf|", "** eee\n** eee");
t!("* abc \n|** edf|", "*** abc");
t!("* abc \n*|* edf|", "*** abc");
t!("* abc \n**| edf|", "*** abc");
t!("* abc \n**| |edf", "*** abc");
t!("* abc \n** |edf|", "*** abc");
t!("* abc \n** |edf|", "\n*** abc");
t!("* abc \n** |edf|", "\n** abc");
t!("* abc \n** |edf|", "\n* abc");
t!("* abc \n** \n|edf|", "* abc");
t!("* abc \n** \n|edf|", "* abc\n* abc");
t!("* abc \n** |edf|", "* abc");
t!("* abc \n** |edf|", "* abc\n* abc");
t!("* abc \n|* edf\n|* gh", "* hg");
t!("* abc \n|* edf\n|* gh", "* hg\n");
t!("* abc \n* edf\n|* gh|", "* hg");
}

305
src/syntax/block.rs Normal file
View file

@ -0,0 +1,305 @@
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_while, take_while1},
character::complete::{alpha1, space0, space1},
combinator::{cond, opt},
sequence::{separated_pair, tuple},
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement,
NodeBuilder,
},
element::element_nodes,
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind::*,
};
fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
let (input, (block_begin, name)) = block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
let kind = match name {
s if s.eq_ignore_ascii_case("COMMENT") => COMMENT_BLOCK,
s if s.eq_ignore_ascii_case("EXAMPLE") => EXAMPLE_BLOCK,
s if s.eq_ignore_ascii_case("EXPORT") => EXPORT_BLOCK,
s if s.eq_ignore_ascii_case("SRC") => SOURCE_BLOCK,
s if s.eq_ignore_ascii_case("CENTER") => CENTER_BLOCK,
s if s.eq_ignore_ascii_case("QUOTE") => QUOTE_BLOCK,
s if s.eq_ignore_ascii_case("VERSE") => VERSE_BLOCK,
_ => SPECIAL_BLOCK,
};
for (input, contents) in line_starts_iter(&input).map(|i| input.take_split(i)) {
if let Ok((input, block_end)) = block_end_node(input, name) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.extend(affiliated_keywords);
children.push(block_begin);
children.extend(pre_blank);
if kind.is_greater_element() {
children.push(node(BLOCK_CONTENT, element_nodes(contents)?));
} else {
children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents)));
}
children.push(block_end);
children.extend(post_blank);
return Ok((input, node(kind, children)));
}
}
Err(nom::Err::Error(()))
}
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws1);
b.text(begin);
b.text(name);
if name.eq_ignore_ascii_case("SRC") {
let (input, language) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?;
let (input, ws1) = space0(input)?;
let (input, (parameters, ws2, nl)) = trim_line_end(input)?;
if let Some((ws, language)) = language {
b.ws(ws);
b.token(SRC_BLOCK_LANGUAGE, language);
}
if let Some((ws, switches)) = switches {
b.ws(ws);
b.token(SRC_BLOCK_SWITCHES, switches);
}
b.ws(ws1);
if !parameters.is_empty() {
b.token(SRC_BLOCK_PARAMETERS, parameters);
}
b.ws(ws2);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else if name.eq_ignore_ascii_case("EXPORT") {
let (input, ty) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
if let Some((ws, ty)) = ty {
b.ws(ws);
b.token(EXPORT_BLOCK_TYPE, ty);
}
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else {
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
}
}
fn source_block_switches(input: Input) -> IResult<Input, Input, ()> {
let mut i = input;
while !i.is_empty() {
match tuple::<_, _, (), _>((
cond(i.len() != input.len(), space1),
alt((
separated_pair(
alt((tag("-l"), tag("-n"))),
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
),
tuple((tag("+"), alpha1)),
tuple((tag("-"), alpha1)),
)),
))(i)
{
Ok((i_, _)) => i = i_,
_ => break,
}
}
let len = input.len() - i.len();
if len == 0 {
Err(nom::Err::Error(()))
} else {
Ok(input.take_split(len))
}
}
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
let (input, (ws, end, name, ws_, nl)) =
tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.text(name);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(BLOCK_END)))
}
fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
let mut nodes = vec![];
let s = input.as_str();
let mut start = 0;
for i in line_starts_iter(s) {
// line must start with either ",*" or ",#+"
if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") {
continue;
}
let text = &s[start..i];
if !text.is_empty() {
nodes.push(token(TEXT, text));
}
nodes.push(token(COMMA, ","));
start = i + 1;
}
if !s[start..].is_empty() {
nodes.push(token(TEXT, &s[start..]));
}
nodes
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(block_node_base, input)
}
#[test]
fn test_parse() {
use crate::ast::{ExampleBlock, SourceBlock};
use crate::tests::to_ast;
let to_src_block = to_ast::<SourceBlock>(block_node);
let to_example_block = to_ast::<ExampleBlock>(block_node);
insta::assert_debug_snapshot!(
to_example_block(
r#"#+BEGIN_EXAMPLE
,* headline
,#+block
text
#+END_EXAMPLE"#
).syntax,
@r###"
EXAMPLE_BLOCK@0..59
BLOCK_BEGIN@0..16
TEXT@0..8 "#+BEGIN_"
TEXT@8..15 "EXAMPLE"
NEW_LINE@15..16 "\n"
BLOCK_CONTENT@16..42
COMMA@16..17 ","
TEXT@17..28 "* headline\n"
COMMA@28..29 ","
TEXT@29..42 "#+block\ntext\n"
BLOCK_END@42..59
WHITESPACE@42..46 " "
TEXT@46..52 "#+END_"
TEXT@52..59 "EXAMPLE"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC
#+END_SRC"#
).syntax,
@r###"
SOURCE_BLOCK@0..27
BLOCK_BEGIN@0..12
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13 "\n"
BLANK_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
TEXT@18..24 "#+END_"
TEXT@24..27 "SRC"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+begin_src
#+end_src"#
).syntax,
@r###"
SOURCE_BLOCK@0..25
BLOCK_BEGIN@0..12
TEXT@0..8 "#+begin_"
TEXT@8..11 "src"
NEW_LINE@11..12 "\n"
BLOCK_CONTENT@12..12
BLOCK_END@12..25
WHITESPACE@12..16 " "
TEXT@16..22 "#+end_"
TEXT@22..25 "src"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC javascript -n 20 -r :var n=0, l=2 :foo=bar
alert('Hello World!');
#+END_SRC
"#).syntax,
@r###"
SOURCE_BLOCK@0..100
BLOCK_BEGIN@0..58
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
WHITESPACE@11..12 " "
SRC_BLOCK_LANGUAGE@12..22 "javascript"
WHITESPACE@22..24 " "
SRC_BLOCK_SWITCHES@24..32 "-n 20 -r"
WHITESPACE@32..34 " "
SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar"
NEW_LINE@57..58 "\n"
BLOCK_CONTENT@58..81
TEXT@58..81 "alert('Hello World!');\n"
BLOCK_END@81..95
WHITESPACE@81..85 " "
TEXT@85..91 "#+END_"
TEXT@91..94 "SRC"
NEW_LINE@94..95 "\n"
BLANK_LINE@95..96 "\n"
BLANK_LINE@96..100 " "
"###
);
// TODO: more testing
}

134
src/syntax/clock.rs Normal file
View file

@ -0,0 +1,134 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, space0},
combinator::{map, opt, recognize},
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder,
},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn clock_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
space0,
tag("CLOCK:"),
space0,
alt((timestamp_inactive_node, timestamp_active_node)),
opt(tuple((
space0,
double_arrow_token,
space0,
recognize(tuple((digit1, colon_token, digit1))),
))),
space0,
eol_or_eof,
blank_lines,
)),
|(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(clock);
b.ws(ws_);
b.push(timestamp);
if let Some((ws, double_arrow, ws_, time)) = duration {
b.ws(ws);
b.push(double_arrow);
b.ws(ws_);
b.text(time);
}
b.ws(ws__);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::CLOCK)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Clock;
use crate::tests::to_ast;
let to_clock = to_ast::<Clock>(clock_node);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax,
@r###"
CLOCK@0..29
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..29
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
"###
);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax,
@r###"
CLOCK@0..64
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..53
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
MINUS2@29..31 "--"
L_BRACKET@31..32 "["
TIMESTAMP_YEAR@32..36 "2003"
MINUS@36..37 "-"
TIMESTAMP_MONTH@37..39 "09"
MINUS@39..40 "-"
TIMESTAMP_DAY@40..42 "16"
WHITESPACE@42..43 " "
TIMESTAMP_DAYNAME@43..46 "Tue"
WHITESPACE@46..47 " "
TIMESTAMP_HOUR@47..49 "10"
COLON@49..50 ":"
TIMESTAMP_MINUTE@50..52 "39"
R_BRACKET@52..53 "]"
WHITESPACE@53..54 " "
DOUBLE_ARROW@54..56 "=>"
WHITESPACE@56..58 " "
TEXT@58..62 "1:00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64 "\n"
"###
);
}

162
src/syntax/cloze.rs Normal file
View file

@ -0,0 +1,162 @@
use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake};
use crate::syntax::{
combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token},
object::standard_object_nodes,
};
use super::{
combinator::{GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn cloze_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(cloze_node_base, input)
}
fn cloze_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l_curly2) = l_curly2_token(input)?;
let mut inside_latex = false;
let mut text_end = 0;
for (index, byte) in input.bytes().enumerate() {
match byte {
b'}' if !inside_latex => {
text_end = index;
break;
}
b'$' => {
inside_latex = !inside_latex;
}
_ => {}
}
}
if text_end == 0 {
return Err(nom::Err::Error(()));
}
let (input, text) = input.take_split(text_end);
let (input, r_curly) = r_curly_token(input)?;
let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?;
let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?;
let (input, r_curly_) = r_curly_token(input)?;
let mut b = NodeBuilder::new();
b.push(l_curly2);
b.children.extend(standard_object_nodes(text));
b.push(r_curly);
if let Some((l_curly, hint, r_curly)) = hint {
b.push(l_curly);
b.token(SyntaxKind::TEXT, hint);
b.push(r_curly);
}
if let Some((at, id)) = id {
b.push(at);
b.token(SyntaxKind::TEXT, id);
}
b.push(r_curly_);
Ok((input, b.finish(SyntaxKind::CLOZE)))
}
#[test]
fn parse() {
use crate::ast::Cloze;
use crate::config::ParseConfig;
use crate::tests::to_ast;
let to_cloze = to_ast::<Cloze>(cloze_node);
insta::assert_debug_snapshot!(
to_cloze("{{text}}").syntax,
@r###"
CLOZE@0..8
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
R_CURLY@7..8 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}@id}").syntax,
@r###"
CLOZE@0..11
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
AT@7..8 "@"
TEXT@8..10 "id"
R_CURLY@10..11 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}}").syntax,
@r###"
CLOZE@0..14
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
R_CURLY@13..14 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}@id}").syntax,
@r###"
CLOZE@0..17
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
AT@13..14 "@"
TEXT@14..16 "id"
R_CURLY@16..17 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax,
@r###"
CLOZE@0..28
L_CURLY2@0..2 "{{"
LATEX_FRAGMENT@2..15
DOLLAR@2..3 "$"
TEXT@3..14 "\\frac{a}{b}"
DOLLAR@14..15 "$"
R_CURLY@15..16 "}"
L_CURLY@16..17 "{"
TEXT@17..26 "fractions"
R_CURLY@26..27 "}"
R_CURLY@27..28 "}"
"###
);
let config = &ParseConfig::default();
assert!(cloze_node(("{{}}", config).into()).is_err());
assert!(cloze_node(("{{text}", config).into()).is_err());
assert!(cloze_node(("{text}}", config).into()).is_err());
assert!(cloze_node(("{{text}{}", config).into()).is_err());
assert!(cloze_node(("{{text}a}", config).into()).is_err());
}

327
src/syntax/combinator.rs Normal file
View file

@ -0,0 +1,327 @@
use memchr::{memchr2, memchr2_iter, Memchr2};
use nom::{bytes::complete::tag, IResult, InputTake, Slice};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
use std::iter::once;
use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*};
pub type GreenElement = NodeOrToken<GreenNode, GreenToken>;
#[inline]
pub fn token(kind: SyntaxKind, input: &str) -> GreenElement {
GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input))
}
#[inline]
pub fn node<I>(kind: SyntaxKind, children: I) -> GreenElement
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children))
}
macro_rules! token_parser {
($name:ident, $token:literal, $kind:ident) => {
#[doc = "Recognizes `"]
#[doc = $token]
#[doc = "` and returns GreenToken"]
pub fn $name(input: Input) -> IResult<Input, GreenElement, ()> {
let (i, o) = tag($token)(input)?;
Ok((i, token($kind, o.as_str())))
}
};
}
token_parser!(l_bracket_token, "[", L_BRACKET);
token_parser!(r_bracket_token, "]", R_BRACKET);
token_parser!(l_bracket2_token, "[[", L_BRACKET2);
token_parser!(r_bracket2_token, "]]", R_BRACKET2);
token_parser!(l_parens_token, "(", L_PARENS);
token_parser!(r_parens_token, ")", R_PARENS);
token_parser!(l_angle_token, "<", L_ANGLE);
token_parser!(r_angle_token, ">", R_ANGLE);
token_parser!(l_curly_token, "{", L_CURLY);
#[cfg(feature = "syntax-org-fc")]
token_parser!(l_curly2_token, "{{", L_CURLY2);
token_parser!(r_curly_token, "}", R_CURLY);
token_parser!(l_curly3_token, "{{{", L_CURLY3);
token_parser!(r_curly3_token, "}}}", R_CURLY3);
token_parser!(l_angle2_token, "<<", L_ANGLE2);
token_parser!(r_angle2_token, ">>", R_ANGLE2);
token_parser!(l_angle3_token, "<<<", L_ANGLE3);
token_parser!(r_angle3_token, ">>>", R_ANGLE3);
token_parser!(at_token, "@", AT);
token_parser!(at2_token, "@@", AT2);
token_parser!(minus2_token, "--", MINUS2);
// token_parser!(percent_token, "%", PERCENT);
token_parser!(percent2_token, "%%", PERCENT2);
// token_parser!(slash_token, "/", SLASH);
token_parser!(backslash_token, "\\", BACKSLASH);
token_parser!(underscore_token, "_", UNDERSCORE);
// token_parser!(star_token, "*", STAR);
// token_parser!(plus_token, "+", PLUS);
token_parser!(minus_token, "-", MINUS);
token_parser!(colon_token, ":", COLON);
token_parser!(colon2_token, "::", COLON2);
token_parser!(pipe_token, "|", PIPE);
token_parser!(dollar_token, "$", DOLLAR);
token_parser!(dollar2_token, "$$", DOLLAR2);
// token_parser!(equal_token, "=", EQUAL);
// token_parser!(tilde_token, "~", TILDE);
token_parser!(hash_plus_token, "#+", HASH_PLUS);
token_parser!(caret_token, "^", CARET);
token_parser!(hash_token, "#", HASH);
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
macro_rules! lossless_parser {
($parser:expr, $input:expr) => {{
let i_ = $input;
let (i, o) = $parser($input)?;
cfg_if::cfg_if! {
if #[cfg(feature = "tracing")] {
tracing::trace!(consumed = o.to_string());
}
}
debug_assert_eq!(
&i_.as_str()[0..(i_.len() - i.len())],
&o.to_string(),
stringify!("parser must be lossless")
);
Ok((i, o))
}};
}
pub(crate) use lossless_parser;
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
if input.is_empty() {
return Ok((input, vec![]));
}
let mut lines = vec![];
let mut start = 0;
let bytes = input.as_bytes();
for index in line_ends_iter(input.as_str()) {
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
start = index;
} else {
break;
}
}
Ok((input.slice(start..), lines))
}
#[test]
fn test_blank_lines() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = blank_lines(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines(("\n", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 1);
assert_eq!(output[0].to_string(), "\n");
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 5);
assert_eq!(output[0].to_string(), " \r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n ");
assert_eq!(output.len(), 6);
assert_eq!(output[0].to_string(), "\r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), "\r");
assert_eq!(output[5].to_string(), " \r");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding
pub fn trim_line_end(input: Input) -> IResult<Input, (Input, Input, Input), ()> {
let bytes = input.as_bytes();
let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) {
Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => (
input.slice(i + 2..),
input.slice(0..i),
input.slice(i..i + 2),
),
Some(i) => (
input.slice(i + 1..),
input.slice(0..i),
input.slice(i..i + 1),
),
_ => (input.of(""), input, input.of("")),
};
let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) {
Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)),
None => (contents.of(""), contents),
};
Ok((input, (contents, ws, nl)))
}
#[test]
fn test_trim_line_end() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = trim_line_end(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) =
trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap();
assert_eq!(input.as_str(), "rest\n");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r\n");
let (input, output) = trim_line_end((" \rr", config).into()).unwrap();
assert_eq!(input.as_str(), "r");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r");
}
/// Recognizes a line ending \r, \n, \r\n or end of file
pub fn eol_or_eof(input: Input) -> IResult<Input, Input, ()> {
let mut bytes = input.bytes();
let count = match bytes.next() {
Some(b'\n') => 1,
Some(b'\r') => {
if matches!(bytes.next(), Some(b'\n')) {
2
} else {
1
}
}
None => 0,
_ => return Err(nom::Err::Error(())),
};
Ok(input.take_split(count))
}
struct LineStart<'a> {
bytes: &'a [u8],
iter: Memchr2<'a>,
}
impl<'a> LineStart<'a> {
fn new(input: &'a str) -> Self {
let bytes = input.as_bytes();
LineStart {
bytes,
iter: memchr2_iter(b'\r', b'\n', bytes),
}
}
}
impl<'a> Iterator for LineStart<'a> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
let i = self.iter.next()?;
if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') {
let ii = self.iter.next();
debug_assert_eq!(i + 1, ii.unwrap());
Some(i + 2)
} else {
Some(i + 1)
}
}
}
/// Returns an iterator of positions of line start, including zero
pub fn line_starts_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
once(0).chain(LineStart::new(s))
}
/// Returns an iterator of positions of line end, including eof
pub fn line_ends_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
LineStart::new(s).chain(once(s.len()))
}
pub struct NodeBuilder {
pub children: Vec<GreenElement>,
}
impl NodeBuilder {
pub fn new() -> NodeBuilder {
NodeBuilder { children: vec![] }
}
pub fn ws(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace()));
self.children.push(i.ws_token())
}
}
pub fn nl(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(
i.s == "\n" || i.s == "\r\n" || i.s == "\r",
"{:?} should be a new line",
i.s
);
self.children.push(i.nl_token())
}
}
pub fn text(&mut self, i: Input) {
if !i.is_empty() {
self.children.push(i.text_token())
}
}
pub fn token(&mut self, kind: SyntaxKind, i: Input) {
self.children.push(i.token(kind))
}
pub fn push(&mut self, elem: GreenElement) {
self.children.push(elem)
}
pub fn push_opt(&mut self, elem: Option<GreenElement>) {
if let Some(elem) = elem {
self.children.push(elem)
}
}
pub fn len(&self) -> usize {
self.children.len()
}
pub fn finish(self, kind: SyntaxKind) -> GreenElement {
GreenElement::Node(GreenNode::new(kind.into(), self.children))
}
}

115
src/syntax/comment.rs Normal file
View file

@ -0,0 +1,115 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
fn comment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let mut iter = iterator(
input,
opt(tuple((
space0,
tag("#"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::HASH, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::COMMENT)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn comment_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(comment_node_base, input)
}
#[test]
fn parse() {
use crate::{
syntax::{comment::comment_node, input::Input, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
SyntaxNode::new_root(
comment_node(Input {
s: input,
c: &ParseConfig::default(),
})
.unwrap()
.1
.into_node()
.unwrap(),
)
};
insta::assert_debug_snapshot!(
t("#"),
@r###"
COMMENT@0..1
HASH@0..1 "#"
"###
);
insta::assert_debug_snapshot!(
t("#\n # a\n #\n\n"),
@r###"
COMMENT@0..12
HASH@0..1 "#"
TEXT@1..2 "\n"
WHITESPACE@2..4 " "
HASH@4..5 "#"
WHITESPACE@5..6 " "
TEXT@6..7 "a"
TEXT@7..8 "\n"
WHITESPACE@8..9 " "
HASH@9..10 "#"
TEXT@10..11 "\n"
BLANK_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
t("#\na\n #\n\n"),
@r###"
COMMENT@0..2
HASH@0..1 "#"
TEXT@1..2 "\n"
"###
);
}

147
src/syntax/cookie.rs Normal file
View file

@ -0,0 +1,147 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::map,
sequence::{pair, separated_pair, tuple},
IResult,
};
use super::{
combinator::{l_bracket_token, node, r_bracket_token, token, GreenElement},
input::Input,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn cookie_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_bracket_token,
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
r_bracket_token,
)),
|(l_bracket, value, r_bracket)| {
let mut children = vec![l_bracket];
children.push(token(TEXT, value.0.as_str()));
match value.1.as_str() {
"%" => {
children.push(token(PERCENT, value.1.as_str()));
}
_ => {
children.push(token(SLASH, "/"));
children.push(token(TEXT, value.1.as_str()));
}
}
children.push(r_bracket);
node(COOKIE, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Cookie;
use crate::tests::to_ast;
use crate::ParseConfig;
let to_cookie = to_ast::<Cookie>(cookie_node);
insta::assert_debug_snapshot!(
to_cookie("[1/10]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..5 "10"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[1/1000]").syntax,
@r###"
COOKIE@0..8
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..7 "1000"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[10%]").syntax,
@r###"
COOKIE@0..5
L_BRACKET@0..1 "["
TEXT@1..3 "10"
PERCENT@3..4 "%"
R_BRACKET@4..5 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[%]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
PERCENT@1..2 "%"
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..2 ""
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[100/]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..4 "100"
SLASH@4..5 "/"
TEXT@5..5 ""
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/100]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..5 "100"
R_BRACKET@5..6 "]"
"###
);
let config = &ParseConfig::default();
assert!(cookie_node(("[10% ]", config).into()).is_err());
assert!(cookie_node(("[1//100]", config).into()).is_err());
assert!(cookie_node(("[1\\100]", config).into()).is_err());
assert!(cookie_node(("[10%%]", config).into()).is_err());
}

139
src/syntax/document.rs Normal file
View file

@ -0,0 +1,139 @@
use nom::{combinator::opt, IResult};
use super::{
combinator::{blank_lines, node, GreenElement},
drawer::property_drawer_node,
headline::{headline_node, section_node},
input::Input,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn document_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(document_node_base, input)
}
fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
if input.is_empty() {
return Ok((input, node(DOCUMENT, [])));
}
let mut children = vec![];
let (input, property_drawer) = opt(property_drawer_node)(input)?;
if let Some(property_drawer) = property_drawer {
children.push(property_drawer);
}
let (input, pre_blank) = blank_lines(input)?;
children.extend(pre_blank);
if input.is_empty() {
return Ok((input, node(DOCUMENT, children)));
}
let (input, section) = opt(section_node)(input)?;
if let Some(section) = section {
children.push(section);
}
let mut i = input;
while !i.is_empty() {
let (input, headline) = headline_node(i)?;
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len(),);
i = input;
children.push(headline);
}
Ok((i, node(DOCUMENT, children)))
}
#[test]
fn parse() {
use crate::ast::Document;
use crate::tests::to_ast;
let to_document = to_ast::<Document>(document_node);
insta::assert_debug_snapshot!(
to_document("").syntax,
@r###"
DOCUMENT@0..0
"###
);
insta::assert_debug_snapshot!(
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1 "\n"
BLANK_LINE@1..4 " \n"
BLANK_LINE@4..5 "\n"
"###
);
insta::assert_debug_snapshot!(
to_document("section").syntax,
@r###"
DOCUMENT@0..7
SECTION@0..7
PARAGRAPH@0..7
TEXT@0..7 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
HEADLINE_TITLE@3..10
TEXT@3..10 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "
HEADLINE_TITLE@4..13
TEXT@4..13 "heading 2"
NEW_LINE@13..14 "\n"
HEADLINE@14..25
HEADLINE_STARS@14..15 "*"
WHITESPACE@15..16 " "
HEADLINE_TITLE@16..25
TEXT@16..25 "heading 1"
"###
);
insta::assert_debug_snapshot!(
to_document("section\n** heading 2\n*heading 1").syntax,
@r###"
DOCUMENT@0..31
SECTION@0..8
PARAGRAPH@0..8
TEXT@0..8 "section\n"
HEADLINE@8..31
HEADLINE_STARS@8..10 "**"
WHITESPACE@10..11 " "
HEADLINE_TITLE@11..20
TEXT@11..20 "heading 2"
NEW_LINE@20..21 "\n"
SECTION@21..31
PARAGRAPH@21..31
TEXT@21..31 "*heading 1"
"###
);
}

275
src/syntax/drawer.rs Normal file
View file

@ -0,0 +1,275 @@
use nom::{
bytes::complete::{tag_no_case, take_while1},
character::complete::{space0, space1},
combinator::{iterator, map, verify},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, colon_token, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement,
NodeBuilder,
},
element::element_nodes,
input::Input,
SyntaxKind::*,
};
fn drawer_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let mut b = NodeBuilder::new();
let (input, (ws, colon, name, colon_, ws_, nl)) = tuple((
space0,
colon_token,
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
colon_token,
space0,
eol_or_eof,
))(input)?;
b.ws(ws);
b.push(colon);
b.text(name);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(DRAWER_BEGIN), name.as_str())))
}
fn drawer_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, colon, end, colon_, ws_, nl)) = tuple((
space0,
colon_token,
tag_no_case("END"),
colon_token,
space0,
eol_or_eof,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(end);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DRAWER_END)))
}
fn drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, _)) = drawer_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = drawer_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
if !contents.is_empty() {
children.push(node(DRAWER_CONTENT, element_nodes(contents)?));
} else {
children.push(node(DRAWER_CONTENT, []));
}
children.push(end);
children.extend(post_blank);
return Ok((input, node(DRAWER, children)));
}
}
Err(nom::Err::Error(()))
}
fn property_drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, name)) = drawer_begin_node(input)?;
if !name.eq_ignore_ascii_case("properties") {
return Err(nom::Err::Error(()));
}
let mut children = vec![begin];
let mut it = iterator(input, node_property_node);
children.extend(&mut it);
let (input, _) = it.finish()?;
let (input, end) = drawer_end_node(input)?;
let (input, post_blank) = blank_lines(input)?;
children.push(end);
children.extend(post_blank);
Ok((input, node(PROPERTY_DRAWER, children)))
}
fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, ws1) = space0(input)?;
let (input, colon1) = colon_token(input)?;
let (input, (colon2, name)) = map(
verify(
take_while1(|c| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
|i: &Input| i.ends_with(':'),
),
|input: Input| input.take_split(input.len() - 1),
)(input)?;
let (input, ws2) = space1(input)?;
let (input, (value, ws3, nl)) = trim_line_end(input)?;
let mut b = NodeBuilder::new();
b.ws(ws1);
b.push(colon1);
if name.ends_with('+') {
let (plus, name) = name.take_split(name.len() - 1);
b.text(name);
b.token(PLUS, plus);
} else {
b.text(name);
}
b.token(COLON, colon2);
b.ws(ws2);
b.text(value);
b.ws(ws3);
b.nl(nl);
Ok((input, b.finish(NODE_PROPERTY)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
crate::lossless_parser!(property_drawer_node_base, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(drawer_node_base, input)
}
#[test]
fn parse() {
use crate::{
ast::{Drawer, PropertyDrawer},
tests::to_ast,
ParseConfig,
};
let to_drawer = to_ast::<Drawer>(drawer_node);
let to_property_drawer = to_ast::<PropertyDrawer>(property_drawer_node);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:CUSTOM_ID: id
:END:"#
).syntax,
@r###"
DRAWER@0..33
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
DRAWER_CONTENT@9..26
PARAGRAPH@9..26
TEXT@9..18 " :CUSTOM"
SUBSCRIPT@18..21
UNDERSCORE@18..19 "_"
TEXT@19..21 "ID"
TEXT@21..26 ": id\n"
DRAWER_END@26..33
WHITESPACE@26..28 " "
COLON@28..29 ":"
TEXT@29..32 "END"
COLON@32..33 ":"
"###
);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:END:
"#
).syntax,
@r###"
DRAWER@0..19
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10 "\n"
DRAWER_CONTENT@10..10
DRAWER_END@10..18
WHITESPACE@10..12 " "
COLON@12..13 ":"
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19 "\n"
"###
);
// https://github.com/PoiScript/orgize/issues/70#issuecomment-2099671563
insta::assert_debug_snapshot!(
to_property_drawer(r#":PROPERTIES:
:header-args:clojure: :session *clojure-1*
:NAME: VALUE
:NAME+: VALUE
:END:"#).syntax,
@r###"
PROPERTY_DRAWER@0..91
DRAWER_BEGIN@0..13
COLON@0..1 ":"
TEXT@1..11 "PROPERTIES"
COLON@11..12 ":"
NEW_LINE@12..13 "\n"
NODE_PROPERTY@13..59
COLON@13..14 ":"
TEXT@14..33 "header-args:clojure"
COLON@33..34 ":"
WHITESPACE@34..38 " "
TEXT@38..58 ":session *clojure-1*"
NEW_LINE@58..59 "\n"
NODE_PROPERTY@59..72
COLON@59..60 ":"
TEXT@60..64 "NAME"
COLON@64..65 ":"
WHITESPACE@65..66 " "
TEXT@66..71 "VALUE"
NEW_LINE@71..72 "\n"
NODE_PROPERTY@72..86
COLON@72..73 ":"
TEXT@73..77 "NAME"
PLUS@77..78 "+"
COLON@78..79 ":"
WHITESPACE@79..80 " "
TEXT@80..85 "VALUE"
NEW_LINE@85..86 "\n"
DRAWER_END@86..91
COLON@86..87 ":"
TEXT@87..90 "END"
COLON@90..91 ":"
"###
);
let config = &ParseConfig::default();
// https://github.com/PoiScript/orgize/issues/9
assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err());
assert!(property_drawer_node((":PROPERTIES:\n:NAME:VALUE\n:END:", config).into()).is_err());
}

107
src/syntax/dyn_block.rs Normal file
View file

@ -0,0 +1,107 @@
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0, space1},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn dyn_block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, begin) = dyn_block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = dyn_block_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DYN_BLOCK, children)));
}
}
Err(nom::Err::Error(()))
}
fn dyn_block_begin_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple((
space0,
tag_no_case("#+BEGIN:"),
space1,
alpha1,
trim_line_end,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(begin);
b.ws(ws_);
b.text(name);
b.text(args);
b.ws(ws__);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_BEGIN)))
}
fn dyn_block_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, end, ws_, nl)) =
tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_END)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn dyn_block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(dyn_block_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::DynBlock, tests::to_ast};
let to_dyn_block = to_ast::<DynBlock>(dyn_block_node);
insta::assert_debug_snapshot!(
to_dyn_block(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#).syntax,
@r###"
DYN_BLOCK@0..53
DYN_BLOCK_BEGIN@0..32
TEXT@0..8 "#+BEGIN:"
WHITESPACE@8..9 " "
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53 " "
"###
);
}

339
src/syntax/element.rs Normal file
View file

@ -0,0 +1,339 @@
use std::iter::once;
use memchr::memchr2_iter;
use nom::{IResult, InputTake};
use super::{
block::block_node,
clock::clock_node,
combinator::GreenElement,
comment::comment_node,
drawer::drawer_node,
dyn_block::dyn_block_node,
fixed_width::fixed_width_node,
fn_def::fn_def_node,
input::Input,
keyword::{affiliated_keyword_nodes, keyword_node},
latex_environment::latex_environment_node,
list::list_node,
paragraph::{paragraph_node, paragraph_nodes},
rule::rule_node,
table::{org_table_node, table_el_node},
};
/// Recognizes multiple org-mode elements
///
/// input must not contains blank line in the beginning
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
debug_assert!(!input.is_empty());
// TODO:
// debug_assert!(
// blank_lines(input).unwrap().1.is_empty(),
// "input must not starts with blank lines: {:?}",
// input.s
// );
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in ElementPositions::new(i) {
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
nodes.extend(paragraph_nodes(head)?);
}
nodes.push(element);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
continue 'l;
}
}
nodes.extend(paragraph_nodes(i)?);
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, n| s + &n.to_string()),
"parser must be lossless"
);
Ok(nodes)
}
/// Recognizes an org-mode element expect paragraph
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
// skip affiliated keyword first
let (i, nodes) = affiliated_keyword_nodes(input)?;
let has_affiliated_keyword = !nodes.is_empty();
// find first non-whitespace character
let byte = i.bytes().find(|&b| b != b' ' && b != b'\t');
debug_assert!(
!(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))),
"affiliated_keyword must not followed by blank lines: {:?}",
input.s
);
let result = match byte {
Some(b'[') => fn_def_node(input),
Some(b'0'..=b'9') | Some(b'*') => list_node(input),
// clock doesn't have affiliated keywords
Some(b'C') if !has_affiliated_keyword => clock_node(input),
Some(b'-') => rule_node(input).or_else(|_| list_node(input)),
Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)),
Some(b'|') => org_table_node(input),
Some(b'+') => table_el_node(input).or_else(|_| list_node(input)),
Some(b'#') => block_node(input)
.or_else(|_| keyword_node(input))
.or_else(|_| dyn_block_node(input))
.or_else(|_| comment_node(input)),
Some(b'\\') => latex_environment_node(input),
_ => Err(nom::Err::Error(())),
};
if has_affiliated_keyword {
result.or_else(|_| paragraph_node(input))
} else {
result
}
}
struct ElementPositions<'a> {
input: Input<'a>,
pos: usize,
}
impl<'a> ElementPositions<'a> {
fn new(input: Input<'a>) -> Self {
ElementPositions { input, pos: 0 }
}
}
impl<'a> Iterator for ElementPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.input.s.len() {
return None;
}
let bytes = &self.input.as_bytes()[self.pos..];
let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1));
while let Some(i) = iter.next() {
let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?;
if matches!(
b,
b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\'
) {
let previous = self.pos;
self.pos = iter
.next()
.map_or_else(|| self.input.s.len(), |i| i + self.pos);
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
"{} < {} < {}",
previous,
self.pos,
self.input.s.len()
);
let (input, head) = self.input.take_split(i + previous);
return Some((input, head));
}
}
None
}
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n";
let vec = ElementPositions::new((s, &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 5);
assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n");
assert_eq!(vec[3].0.s, "\t\t[\n: \r\n");
assert_eq!(vec[4].0.s, ": \r\n");
}
#[test]
fn parse() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// paragraph stops at blank lines
insta::assert_debug_snapshot!(
t(r#"a
b"#),
@r###"
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###
);
// paragraph followed by special element
insta::assert_debug_snapshot!(
t("Table:\n|cell"),
@r###"
SECTION@0..12
PARAGRAPH@0..7
TEXT@0..7 "Table:\n"
ORG_TABLE@7..12
ORG_TABLE_STANDARD_ROW@7..12
PIPE@7..8 "|"
ORG_TABLE_CELL@8..12
TEXT@8..12 "cell"
"###
);
}
#[test]
fn affiliated_keywords() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// affiliated keywords + paragraph
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
@r###"
SECTION@0..41
PARAGRAPH@0..41
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
LINK@26..41
L_BRACKET2@26..28 "[["
LINK_PATH@28..39 "./img/a.jpg"
R_BRACKET2@39..41 "]]"
"###
);
// affiliated keywords + blank lines, fallback to normal keyword
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n#+CAPTION: abc\n\n[[./img/a.jpg]]"),
@r###"
SECTION@0..57
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
KEYWORD@26..42
HASH_PLUS@26..28 "#+"
TEXT@28..35 "CAPTION"
COLON@35..36 ":"
TEXT@36..40 " abc"
NEW_LINE@40..41 "\n"
BLANK_LINE@41..42 "\n"
PARAGRAPH@42..57
LINK@42..57
L_BRACKET2@42..44 "[["
LINK_PATH@44..55 "./img/a.jpg"
R_BRACKET2@55..57 "]]"
"###
);
// affiliated keywords + special element
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\n[fn:WORD] https://orgmode.org"),
@r###"
SECTION@0..55
FN_DEF@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "WORD"
R_BRACKET@34..35 "]"
TEXT@35..55 " https://orgmode.org"
"###
);
// affiliated keywords + clock
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\nCLOCK: [2003-09-16 Tue 09:39]"),
@r###"
SECTION@0..55
PARAGRAPH@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
TEXT@26..33 "CLOCK: "
TIMESTAMP_INACTIVE@33..55
L_BRACKET@33..34 "["
TIMESTAMP_YEAR@34..38 "2003"
MINUS@38..39 "-"
TIMESTAMP_MONTH@39..41 "09"
MINUS@41..42 "-"
TIMESTAMP_DAY@42..44 "16"
WHITESPACE@44..45 " "
TIMESTAMP_DAYNAME@45..48 "Tue"
WHITESPACE@48..49 " "
TIMESTAMP_HOUR@49..51 "09"
COLON@51..52 ":"
TIMESTAMP_MINUTE@52..54 "39"
R_BRACKET@54..55 "]"
"###
);
// affiliated keywords + eof
insta::assert_debug_snapshot!(
t("#+CAPTION: Longer caption."),
@r###"
SECTION@0..26
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..26 " Longer caption."
"###
);
}

186
src/syntax/emphasis.rs Normal file
View file

@ -0,0 +1,186 @@
use bytecount::count;
use memchr::memchr_iter;
use nom::{combinator::map, IResult, Slice};
use super::{
combinator::{node, token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn bold_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
children.extend(standard_object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn code_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'~'), |contents| {
node(
CODE,
[token(TILDE, "~"), contents.text_token(), token(TILDE, "~")],
)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn strike_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
children.extend(standard_object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn verbatim_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'='), |contents| {
node(
VERBATIM,
[token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")],
)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
children.extend(standard_object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn italic_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
children.extend(standard_object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
});
crate::lossless_parser!(parser, input)
}
fn emphasis(marker: u8) -> impl Fn(Input) -> IResult<Input, Input, ()> {
move |input: Input| {
let bytes = input.as_bytes();
if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() {
return Err(nom::Err::Error(()));
}
for idx in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if idx == 1 {
continue;
} else if count(&bytes[1..idx], b'\n') >= 2 {
break;
} else if validate_marker(idx, input) {
return Ok((input.slice(idx + 1..), input.slice(1..idx)));
}
}
Err(nom::Err::Error(()))
}
}
fn validate_marker(pos: usize, text: Input) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(post) = text.as_bytes().get(pos + 1) {
[
b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}',
b'[',
]
.contains(post)
} else {
true
}
}
pub fn verify_pre(input: &str) -> bool {
if input.is_empty() {
return true;
}
matches!(
input.as_bytes()[input.len() - 1],
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
)
}
#[test]
fn parse() {
use crate::{ast::Bold, tests::to_ast, ParseConfig};
let to_bold = to_ast::<Bold>(bold_node);
insta::assert_debug_snapshot!(
to_bold("*bold*").syntax,
@r###"
BOLD@0..6
STAR@0..1 "*"
TEXT@1..5 "bold"
STAR@5..6 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo*ld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo*ld"
STAR@6..7 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo\nld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo\nld"
STAR@6..7 "*"
"###
);
let config = &ParseConfig::default();
assert!(bold_node(("*bold*a", config).into()).is_err());
assert!(bold_node(("*bold *", config).into()).is_err());
assert!(bold_node(("* bold*", config).into()).is_err());
assert!(bold_node(("*b\nol\nd*", config).into()).is_err());
assert!(italic_node(("*bold*", config).into()).is_err());
}

120
src/syntax/entity.rs Normal file
View file

@ -0,0 +1,120 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::alphanumeric1,
combinator::opt,
IResult,
};
use crate::{
entities::ENTITIES,
syntax::combinator::{backslash_token, node},
SyntaxKind,
};
use super::{combinator::GreenElement, input::Input};
pub fn entity_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with('\\'));
let mut parser = alt((template1, template2));
crate::lossless_parser!(parser, input)
}
// \NAME POST or // \NAME{}
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, name) = alphanumeric1(input)?;
if ENTITIES.iter().all(|i| i.0 != name.s) {
return Err(nom::Err::Error(()));
}
let (input, brackets) = opt(tag("{}"))(input)?;
if let Some(brackets) = brackets {
return Ok((
input,
node(
SyntaxKind::ENTITY,
[backslash, name.text_token(), brackets.text_token()],
),
));
}
if let Some(post) = input.bytes().next() {
if post.is_ascii_alphabetic() {
return Err(nom::Err::Error(()));
}
}
Ok((
input,
node(SyntaxKind::ENTITY, [backslash, name.text_token()]),
))
}
// \_SPACES
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, underscore) = tag("_")(input)?;
let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?;
Ok((
input,
node(
SyntaxKind::ENTITY,
[
backslash,
underscore.token(SyntaxKind::UNDERSCORE),
spaces.text_token(),
],
),
))
}
#[test]
fn parse() {
use crate::{ast::Entity, tests::to_ast, ParseConfig};
let to_entity = to_ast::<Entity>(entity_node);
insta::assert_debug_snapshot!(
to_entity("\\cent").syntax,
@r###"
ENTITY@0..5
BACKSLASH@0..1 "\\"
TEXT@1..5 "cent"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\S").syntax,
@r###"
ENTITY@0..2
BACKSLASH@0..1 "\\"
TEXT@1..2 "S"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\frac12{}test").syntax,
@r###"
ENTITY@0..9
BACKSLASH@0..1 "\\"
TEXT@1..7 "frac12"
TEXT@7..9 "{}"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\_ ").syntax,
@r###"
ENTITY@0..21
BACKSLASH@0..1 "\\"
UNDERSCORE@1..2 "_"
TEXT@2..21 " "
"###
);
let c = ParseConfig::default();
assert!(entity_node(("\\poi", &c).into()).is_err());
}

100
src/syntax/fixed_width.rs Normal file
View file

@ -0,0 +1,100 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
fn fixed_width_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let (input, keywords) = affiliated_keyword_nodes(input)?;
b.children.extend(keywords);
let mut iter = iterator(
input,
opt(tuple((
space0,
tag(":"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::COMMA, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::FIXED_WIDTH)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fixed_width_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fixed_width_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::FixedWidth, tests::to_ast};
let to_fixed_width = to_ast::<FixedWidth>(fixed_width_node);
insta::assert_debug_snapshot!(
to_fixed_width(
r#": A
:
: B
: C
"#
).syntax,
@r###"
FIXED_WIDTH@0..19
COMMA@0..1 ":"
WHITESPACE@1..2 " "
TEXT@2..3 "A"
TEXT@3..4 "\n"
COMMA@4..5 ":"
TEXT@5..6 "\n"
COMMA@6..7 ":"
WHITESPACE@7..8 " "
TEXT@8..9 "B"
TEXT@9..10 "\n"
COMMA@10..11 ":"
WHITESPACE@11..12 " "
TEXT@12..13 "C"
TEXT@13..14 "\n"
BLANK_LINE@14..15 "\n"
BLANK_LINE@15..19 " "
"###
);
}

157
src/syntax/fn_def.rs Normal file
View file

@ -0,0 +1,157 @@
use nom::{
bytes::complete::{tag, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement,
NodeBuilder,
},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fn_def_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
affiliated_keyword_nodes,
l_bracket_token,
tag("fn"),
colon_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
r_bracket_token,
trim_line_end,
blank_lines,
)),
|(
affiliated_keywords,
l_bracket,
fn_,
colon,
label,
r_bracket,
(content, ws_, nl),
post_blank,
)| {
let mut b = NodeBuilder::new();
b.children.extend(affiliated_keywords);
b.push(l_bracket);
b.text(fn_);
b.push(colon);
b.text(label);
b.push(r_bracket);
b.text(content);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::FN_DEF)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ParseConfig;
use crate::{ast::FnDef, tests::to_ast};
let to_fn_def = to_ast::<FnDef>(fn_def_node);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..26
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..26 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:word_1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "word_1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "WORD-1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD]").syntax,
@r###"
FN_DEF@0..9
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..8 "WORD"
R_BRACKET@8..9 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax,
@r###"
FN_DEF@0..66
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..65 " In particular, the p ..."
NEW_LINE@65..66 "\n"
"###
);
let config = &ParseConfig::default();
assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err());
insta::assert_debug_snapshot!(
to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..45
AFFILIATED_KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..10 "ATTR_poi"
COLON@10..11 ":"
TEXT@11..13 " 1"
NEW_LINE@13..14 "\n"
L_BRACKET@14..15 "["
TEXT@15..17 "fn"
COLON@17..18 ":"
TEXT@18..24 "WORD-1"
R_BRACKET@24..25 "]"
TEXT@25..45 " https://orgmode.org"
"###
);
}

122
src/syntax/fn_ref.rs Normal file
View file

@ -0,0 +1,122 @@
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::tuple,
Err, IResult, InputTake,
};
use super::{
combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fn_ref_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fn_ref_node_base, input)
}
fn fn_ref_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple((
l_bracket_token,
tag("fn"),
colon_token,
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
opt(tuple((colon_token, balanced_brackets))),
r_bracket_token,
))(input)?;
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
children.extend(standard_object_nodes(definition));
}
children.push(r_bracket);
Ok((input, node(FN_REF, children)))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'[', b']', bytes) {
if bytes[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(Err::Error(()))
}
#[test]
fn parse() {
use crate::{ast::FnRef, tests::to_ast, ParseConfig};
let to_fn_ref = to_ast::<FnRef>(fn_ref_node);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1]").syntax,
@r###"
FN_REF@0..6
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1:2]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
COLON@5..6 ":"
TEXT@6..7 "2"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::2]").syntax,
@r###"
FN_REF@0..7
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..6 "2"
R_BRACKET@6..7 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::[]]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..7 "[]"
R_BRACKET@7..8 "]"
"###
);
let config = &ParseConfig::default();
assert!(fn_ref_node(("[fn::[]", config).into()).is_err());
}

Some files were not shown because too many files have changed in this diff Show more