Compare commits

..

1 commit

Author SHA1 Message Date
dependabot-preview[bot]
f9a1093eb5
Upgrade to GitHub-native Dependabot 2021-04-29 20:29:30 +00:00
138 changed files with 9202 additions and 15079 deletions

View file

@ -1,2 +0,0 @@
[registries.crates-io]
protocol = "sparse"

View file

@ -1,8 +1,16 @@
version: 2
updates:
- package-ecosystem: cargo
directory: "/"
schedule:
interval: weekly
time: "09:00"
open-pull-requests-limit: 10
- package-ecosystem: cargo
directory: "/"
schedule:
interval: weekly
time: "09:00"
open-pull-requests-limit: 10
ignore:
- dependency-name: pretty_assertions
versions:
- 0.7.1
- dependency-name: nom
versions:
- 6.1.0
- 6.1.1

View file

@ -1,57 +0,0 @@
name: CI
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@master
- name: Setup rust
uses: dtolnay/rust-toolchain@stable
with:
components: clippy, rustfmt
- run: |
cargo fmt -- --check
cargo test --all-features
cargo clippy
gh-pages:
if: github.ref == 'refs/heads/v0.10'
permissions:
contents: read
pages: write
id-token: write
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Setup Pages
uses: actions/configure-pages@v3
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build
run: wasm-pack build -t web -d ./dist --out-name orgize ./wasm/
- name: Upload artifact
uses: actions/upload-pages-artifact@v2
with:
path: "./wasm"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2

39
.github/workflows/rust.yml vendored Normal file
View file

@ -0,0 +1,39 @@
name: Rust
on:
pull_request:
push:
branches:
- master
jobs:
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Run rustfmt
run: cargo fmt -- --check
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
override: true
- name: Cache target/
uses: actions/cache@v1
with:
path: target
key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
- name: Run Test
run: cargo test --all-features

2
.gitignore vendored
View file

@ -2,6 +2,6 @@
**/*.rs.bk
Cargo.lock
.vscode
benches/*.org
.gdb_history
perf.data*

View file

@ -1,53 +1,39 @@
[workspace]
resolver = "2"
members = [".", "./wasm"]
[package]
name = "orgize"
version = "0.10.0-alpha.10"
version = "0.8.4"
authors = ["PoiScript <poiscript@gmail.com>"]
description = "A Rust library for parsing orgmode files."
repository = "https://github.com/PoiScript/orgize"
edition = "2021"
license = "MIT"
description = "A Rust library for parsing org-mode files."
readme = "README.md"
keywords = ["orgmode", "org-mode", "emacs", "parser"]
edition = "2018"
license = "MIT"
keywords = ["orgmode", "emacs", "parser"]
[package.metadata.docs.rs]
all-features = true
[badges]
travis-ci = { repository = "PoiScript/orgize" }
[features]
default = []
indexmap = ["dep:indexmap"]
chrono = ["dep:chrono"]
tracing = ["dep:tracing"]
syntax-org-fc = []
default = ["ser"]
ser = ["serde", "serde_indextree", "indexmap/serde-1"]
[dependencies]
bytecount = "0.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", optional = true }
indexmap = { version = "2.1", optional = true }
jetscii = "0.5"
memchr = "2.5"
nom = { version = "7.1", default-features = false, features = ["std"] }
rowan = "0.15"
tracing = { version = "0.1", optional = true }
bytecount = "0.6.0"
chrono = { version = "0.4.11", optional = true }
indextree = "4.0.0"
jetscii = "0.4.4"
lazy_static = "1.4.0"
memchr = "2.3.3"
# we don't need to parse any float number, so lexical crate is redundant
nom = { version = "5.1.1", default-features = false, features = ["std"] }
serde = { version = "1.0.106", optional = true, features = ["derive"] }
serde_indextree = { version = "0.2.0", optional = true }
syntect = { version = "4.1.0", optional = true }
indexmap = { version = "1.3.2", features = ["serde-1"], optional = true}
[dev-dependencies]
criterion = "0.5"
insta = "1.29"
slugify = "0.1"
tracing-subscriber = { version = "0.3", features = ["fmt"] }
[[bench]]
name = "parse"
harness = false
[[example]]
name = "parse"
required-features = ["tracing"]
[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3
pretty_assertions = "0.6.1"
serde_json = "1.0.51"
slugify = "0.1.0"

View file

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2023 Alex Lin (poi)
Copyright (c) 2019-2020 Alex Lin (poi)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

281
README.md
View file

@ -1,119 +1,210 @@
# Orgize
[![Build Status](https://travis-ci.org/PoiScript/orgize.svg?branch=master)](https://travis-ci.org/PoiScript/orgize)
[![Crates.io](https://img.shields.io/crates/v/orgize.svg)](https://crates.io/crates/orgize)
[![Documentation](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize)
[![Build status](https://img.shields.io/github/actions/workflow/status/PoiScript/orgize/ci.yml)](https://github.com/PoiScript/orgize/actions/workflows/ci.yml)
![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)
[![Document](https://docs.rs/orgize/badge.svg)](https://docs.rs/orgize)
A Rust library for parsing org-mode files.
A Rust library for parsing orgmode files.
Live Demo: <https://poiscript.github.io/orgize/>
[Live demo](https://orgize.herokuapp.com/)
## Parse
To parse a org-mode string, simply invoking the `Org::parse` function:
```rust
use orgize::{Org, rowan::ast::AstNode};
let org = Org::parse("* DONE Title :tag:");
assert_eq!(
format!("{:#?}", org.document().syntax()),
r#"DOCUMENT@0..18
HEADLINE@0..18
HEADLINE_STARS@0..1 "*"
WHITESPACE@1..2 " "
HEADLINE_KEYWORD_DONE@2..6 "DONE"
WHITESPACE@6..7 " "
HEADLINE_TITLE@7..13
TEXT@7..13 "Title "
HEADLINE_TAGS@13..18
COLON@13..14 ":"
TEXT@14..17 "tag"
COLON@17..18 ":"
"#);
```
use `ParseConfig::parse` to specific a custom parse config
```rust
use orgize::{Org, ParseConfig, ast::Headline};
let config = ParseConfig {
// custom todo keywords
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
};
let org = config.parse("* TASK Title 1");
let hdl = org.first_node::<Headline>().unwrap();
assert_eq!(hdl.todo_keyword().unwrap(), "TASK");
```
## Traverse
Use `org.traverse(&mut traversal)` to walk through the syntax tree.
```rust
use orgize::{
export::{from_fn, Container, Event},
Org,
};
let mut hdl_count = 0;
let mut handler = from_fn(|event| {
if matches!(event, Event::Enter(Container::Headline(_))) {
hdl_count += 1;
}
});
Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler);
assert_eq!(hdl_count, 3);
```
## Modify
Use `org.replace_range(TextRange::new(start, end), "new_text")` to modify the syntax tree:
```rust
use orgize::{Org, ParseConfig, ast::Headline, TextRange};
let mut org = Org::parse("hello\n* world");
let hdl = org.first_node::<Headline>().unwrap();
org.replace_range(hdl.text_range(), "** WORLD!");
let hdl = org.first_node::<Headline>().unwrap();
assert_eq!(hdl.level(), 2);
org.replace_range(TextRange::up_to(hdl.start()), "");
assert_eq!(org.to_org(), "** WORLD!");
```
## Render to html
Call the `Org::to_html` function to export org element tree to html:
To parse a orgmode string, simply invoking the `Org::parse` function:
```rust
use orgize::Org;
Org::parse("* DONE Title :tag:");
```
or `Org::parse_custom`:
``` rust
use orgize::{Org, ParseConfig};
Org::parse_custom(
"* TASK Title 1",
&ParseConfig {
// custom todo keywords
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
},
);
```
## Iter
`Org::iter` function will returns an iterator of `Event`s, which is
a simple wrapper of `Element`.
```rust
use orgize::Org;
for event in Org::parse("* DONE Title :tag:").iter() {
// handling the event
}
```
**Note**: whether an element is container or not, it will appears twice in one loop.
One as `Event::Start(element)`, one as `Event::End(element)`.
## Render html
You can call the `Org::write_html` function to generate html directly, which
uses the `DefaultHtmlHandler` internally:
```rust
use orgize::Org;
let mut writer = Vec::new();
Org::parse("* title\n*section*").write_html(&mut writer).unwrap();
assert_eq!(
Org::parse("* title\n*section*").to_html(),
String::from_utf8(writer).unwrap(),
"<main><h1>title</h1><section><p><b>section</b></p></section></main>"
);
```
Checkout `examples/html-slugify.rs` on how to customizing html export process.
## Render html with custom `HtmlHandler`
To customize html rendering, simply implementing `HtmlHandler` trait and passing
it to the `Org::wirte_html_custom` function.
The following code demonstrates how to add a id for every headline and return
own error type while rendering.
```rust
use std::convert::From;
use std::io::{Error as IOError, Write};
use std::string::FromUtf8Error;
use orgize::export::{DefaultHtmlHandler, HtmlHandler};
use orgize::{Element, Org};
use slugify::slugify;
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
#[derive(Default)]
struct MyHtmlHandler(DefaultHtmlHandler);
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
if title.level > 6 {
return Err(MyError::Heading);
} else {
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">",
title.level,
slugify!(&title.raw),
)?;
}
} else {
// fallthrough to default handler
self.0.start(w, element)?;
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
write!(w, "</a></h{}>", title.level)?;
} else {
self.0.end(w, element)?;
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let mut writer = Vec::new();
let mut handler = MyHtmlHandler::default();
Org::parse("* title\n*section*").wirte_html_custom(&mut writer, &mut handler)?;
assert_eq!(
String::from_utf8(writer)?,
"<main><h1><a id=\"title\" href=\"#title\">title</a></h1>\
<section><p><b>section</b></p></section></main>"
);
Ok(())
}
```
**Note**: as I mentioned above, each element will appears two times while iterating.
And handler will silently ignores all end events from non-container elements.
So if you want to change how a non-container element renders, just redefine the `start`
function and leave the `end` function unchanged.
## Serde
`Org` struct have already implemented serde's `Serialize` trait. It means you can
serialize it into any format supported by serde, such as json:
```rust
use orgize::Org;
use serde_json::{json, to_string};
let org = Org::parse("I 'm *bold*.");
println!("{}", to_string(&org).unwrap());
// {
// "type": "document",
// "children": [{
// "type": "section",
// "children": [{
// "type": "paragraph",
// "children":[{
// "type": "text",
// "value":"I 'm "
// }, {
// "type": "bold",
// "children":[{
// "type": "text",
// "value": "bold"
// }]
// }, {
// "type":"text",
// "value":"."
// }]
// }]
// }]
// }
```
## Features
- **`chrono`**: adds the ability to convert `Timestamp` into `chrono::NaiveDateTime`, disabled by default.
By now, orgize provides four features:
- **`indexmap`**: adds the ability to convert `PropertyDrawer` properties into `IndexMap`, disabled by default.
+ `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
## API compatibility
+ `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default.
`element.syntax()` exposes access to the internal syntax tree, along with some rowan low-level APIs.
This can be useful for intricate tasks.
+ `syntect`: provides `SyntectHtmlHandler` for highlighting code block, disabled by default.
However, the structure of the internal syntax tree can change between different versions of the library.
Because of this, the result of `element.syntax()` doesn't follow semantic versioning,
which means updates might break your code if it relies on this method.
+ `indexmap`: Uses `IndexMap` instead of `HashMap` for properties to preserve their order, disabled by default.
## License
MIT

1
benches/.gitignore vendored
View file

@ -1 +0,0 @@
*.org

View file

@ -1,39 +1,30 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
#![feature(test)]
extern crate test;
use orgize::Org;
use test::Bencher;
const INPUT: &[(&str, &str)] = &[
("doc.org", include_str!("./doc.org")),
("org-faq.org", include_str!("./org-faq.org")),
("org-hacks.org", include_str!("./org-hacks.org")),
(
"org-release-notes.org",
include_str!("./org-release-notes.org"),
),
("org-syntax.org", include_str!("./org-syntax.org")),
];
pub fn bench_parse(c: &mut Criterion) {
let mut group = c.benchmark_group("Org::parse");
for (id, org) in INPUT {
group.throughput(Throughput::Bytes(org.len() as u64));
group.bench_with_input(*id, org, |b, i| b.iter(|| Org::parse(i)));
}
group.finish();
#[bench]
fn org_syntax(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/dev/org-syntax.org
b.iter(|| {
Org::parse(include_str!("org-syntax.org"));
})
}
pub fn bench_to_html(c: &mut Criterion) {
let mut group = c.benchmark_group("Org::to_html");
for (id, org) in INPUT {
group.throughput(Throughput::Bytes(org.len() as u64));
group.bench_with_input(*id, &Org::parse(org), |b, i| b.iter(|| i.to_html()));
}
group.finish();
#[bench]
fn doc(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/doc.org
b.iter(|| {
Org::parse(include_str!("doc.org"));
})
}
criterion_group!(benches, bench_parse, bench_to_html);
criterion_main!(benches);
#[bench]
fn org_faq(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/org-faq.org
b.iter(|| {
Org::parse(include_str!("org-faq.org"));
})
}

View file

@ -1,44 +0,0 @@
## Format, test, lint
```shell
cargo fmt -- --check
cargo test --all-features
cargo clippy --allow-dirty --allow-staged
```
## Update snapshot testing
```shell
cargo install cargo-insta
cargo insta test --all-features
cargo insta review
```
## Fuzz testing
```shell
cargo install cargo-fuzz
rustup default nightly
cargo fuzz run fuzz_target_1
```
## Benchmark
```shell
curl -q https://orgmode.org/worg/doc.org --output ./benches/doc.org
curl -q https://orgmode.org/worg/org-faq.org --output ./benches/org-faq.org
curl -q https://orgmode.org/worg/org-hacks.org --output ./benches/org-hacks.org
curl -q https://orgmode.org/worg/org-release-notes.org --output ./benches/org-release-notes.org
curl -q https://orgmode.org/worg/org-syntax.org --output ./benches/org-syntax.org
curl -q https://raw.githubusercontent.com/bzg/org-mode/main/doc/org-manual.org --output ./benches/org-manual.org
cargo bench --bench parse
```
## Benchmark w/ flamegraph
```shell
cargo install flamegraph
cargo flamegraph --bench parse -o baseline.svg -- --bench
# then open baseline.svg with your browser
```

View file

@ -4,7 +4,7 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Headline
- [X] Objects insides headline title
- [x] Affiliated Keywords
- [ ] Affiliated Keywords
## Greater Elements
- [x] Greater Blocks
@ -15,10 +15,10 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [ ] Objects insides inlinetask title
- [x] Plain Lists and Items
- [x] Nested List
- [x] Nested List Indentation
- [x] Tag
- [x] Counter
- [x] Counter set
- [ ] Nested List Indentation
- [ ] Tag
- [ ] Counter
- [ ] Counter set
- [X] Property Drawers
- [X] Tables
@ -26,25 +26,25 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Babel Call
- [x] Blocks
- [x] Escape characters (`#`,`*`, etc)
- [ ] Escape characters (`#`,`*`, etc)
- [ ] Line numbers
- [X] Clock, Diary Sexp and Planning
- [x] Comments
- [x] Fixed Width Areas
- [x] Horizontal Rules
- [x] Keywords
- [x] LaTeX Environments
- [ ] LaTeX Environments
- [X] Node Properties
- [x] Paragraphs
- [X] Table Rows
## Objects
- [x] Entities and LaTeX Fragments
- [ ] Entities and LaTeX Fragments
- [x] Export Snippets
- [x] Footnote References
- [x] Inline Babel Calls and Source Blocks
- [x] Line Breaks
- [ ] Line Breaks
- [x] Links
- [x] Regular link
- [ ] Plain link
@ -53,22 +53,17 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Macros
- [x] Targets and Radio Targets
- [x] Statistics Cookies
- [x] Subscript and Superscript
- [ ] Subscript and Superscript
- [X] Table Cells
- [x] Timestamps
- [x] Text Markup
- [x] bold
- [x] italic
- [x] underline
- [x] verbatim
- [x] code
- [x] strike-through
## Export
- [x] HTML `Org::to_html`
- [X] Org `Org::to_org`
- [x] HTML
- [X] Org
- [ ] LaTeX
- [X] JSON, (via Serde)
## Extra

942
docs/SYNTAX.md Normal file
View file

@ -0,0 +1,942 @@
# Table of Contents
1. [Headlines and Sections](#Headlines_and_Sections)
2. [Affiliated Keywords](#Affiliated_keywords)
3. [Greater Elements](#Greater_Elements)
1. [Greater Blocks](#Greater_Blocks)
2. [Drawers and Property Drawers](#Drawers)
3. [Dynamic Blocks](#Dynamic_Blocks)
4. [Footnote Definitions](#Footnote_Definitions)
5. [Inlinetasks](#Inlinetasks)
6. [Plain Lists and Items](#Plain_Lists_and_Items)
7. [Property Drawers](#Property_Drawers)
8. [Tables](#Tables)
4. [Elements](#Elements)
1. [Babel Call](#Babel_Call)
2. [Blocks](#Blocks)
3. [Clock, Diary Sexp and Planning](#Clock,_Diary_Sexp_and_Planning)
4. [Comments](#Comments)
5. [Fixed Width Areas](#Fixed_Width_Areas)
6. [Horizontal Rules](#Horizontal_Rules)
7. [Keywords](#Keywords)
8. [LaTeX Environments](#LaTeX_Environments)
9. [Node Properties](#Node_Properties)
10. [Paragraphs](#Paragraphs)
11. [Table Rows](#Table_Rows)
5. [Objects](#Objects)
1. [Entities and LaTeX Fragments](#Entities_and_LaTeX_Fragments)
2. [Export Snippets](#Export_Snippets)
3. [Footnote References](#Footnote_References)
4. [Inline Babel Calls and Source
Blocks](#Inline_Babel_Calls_and_Source_Blocks)
5. [Line Breaks](#Line_Breaks)
6. [Links](#Links)
7. [Macros](#Macros)
8. [Targets and Radio Targets](#Targets_and_Radio_Targets)
9. [Statistics Cookies](#Statistics_Cookies)
10. [Subscript and Superscript](#Subscript_and_Superscript)
11. [Table Cells](#Table_Cells)
12. [Timestamps](#Timestamp)
13. [Text Markup](#Emphasis_Markers)
This document describes and comments Org syntax as it is currently read by its
parser (Org Elements) and, therefore, by the export framework. It also includes
a few comments on that syntax.
A core concept in this syntax is that only headlines, sections, planning lines
and property drawers are context-free<sup><a id="fnr.1" class="footref"
href="#fn.1">1</a></sup><sup>, </sup><sup><a id="fnr.2" class="footref"
href="#fn.2">2</a></sup>. Every other syntactical part only exists within
specific environments.
Three categories are used to classify these environments: **Greater elements**,
**elements**, and **objects**, from the broadest scope to the narrowest. The
word **element** is used for both Greater and non-Greater elements, the context
should make that clear.
The paragraph is the unit of measurement. An element defines syntactical parts
that are at the same level as a paragraph, i.e. which cannot contain or be
included in a paragraph. An object is a part that could be included in an
element. Greater elements are all parts that can contain an element.
Empty lines belong to the largest element ending before them. For example, in a
list, empty lines between items belong are part of the item before them, but
empty lines at the end of a list belong to the plain list element.
Unless specified otherwise, case is not significant.
<a id="Headlines_and_Sections"></a>
# Headlines and Sections
A headline is defined as:
STARS KEYWORD PRIORITY TITLE TAGS
STARS is a string starting at column 0, containing at least one asterisk (and up
to `org-inlinetask-min-level` if `org-inlinetask` library is loaded) and ended
by a space character. The number of asterisks is used to define the level of the
headline. It's the sole compulsory part of a headline.
KEYWORD is a TODO keyword, which has to belong to the list defined in
`org-todo-keywords-1`. Case is significant.
PRIORITY is a priority cookie, i.e. a single letter preceded by a hash sign #
and enclosed within square brackets.
TITLE can be made of any character but a new line. Though, it will match after
every other part have been matched.
TAGS is made of words containing any alpha-numeric character, underscore, at
sign, hash sign or percent sign, and separated with colons.
Examples of valid headlines include:
*
** DONE
*** Some e-mail
**** TODO [#A] COMMENT Title :tag:a2%:
If the first word appearing in the title is `COMMENT`, the headline will be
considered as **commented**. Case is significant.
If its title is `org-footnote-section`, it will be considered as a **footnote
section**. Case is significant.
If `ARCHIVE` is one of its tags, it will be considered as **archived**. Case is
significant.
A headline contains directly one section (optionally), followed by any number of
deeper level headlines.
A section contains directly any greater element or element. Only a headline can
contain a section. As an exception, text before the first headline in the
document also belongs to a section.
As an example, consider the following document:
An introduction.
* A Headline
Some text.
** Sub-Topic 1
** Sub-Topic 2
*** Additional entry
Its internal structure could be summarized as:
(document
(section)
(headline
(section)
(headline)
(headline
(headline))))
<a id="Affiliated_keywords"></a>
# Affiliated Keywords
With the exception of [inlinetasks](#Inlinetasks),
[items](#Plain_Lists_and_Items), [planning](#Clock,_Diary_Sexp_and_Planning),
[clocks](#Clock,_Diary_Sexp_and_Planning), [node properties](#Node_Properties)
and [table rows](#Table_Rows), every other element type can be assigned
attributes.
This is done by adding specific keywords, named **affiliated keywords**, just
above the element considered, no blank line allowed.
Affiliated keywords are built upon one of the following patterns: `#+KEY: VALUE`,
`#+KEY[OPTIONAL]: VALUE` or `#+ATTR_BACKEND: VALUE`.
KEY is either `CAPTION`, `HEADER`, `NAME`, `PLOT` or `RESULTS` string.
BACKEND is a string constituted of alpha-numeric characters, hyphens or
underscores.
OPTIONAL and VALUE can contain any character but a new line. Only `CAPTION` and
`RESULTS` keywords can have an optional value.
An affiliated keyword can appear more than once if KEY is either `CAPTION` or
`HEADER` or if its pattern is `#+ATTR_BACKEND: VALUE`.
`CAPTION`, `AUTHOR`, `DATE` and `TITLE` keywords can contain objects in their
value and their optional value, if applicable.
<a id="Greater_Elements"></a>
# Greater Elements
Unless specified otherwise, greater elements can contain directly any other
element or greater element excepted:
- elements of their own type,
- [node properties](#Node_Properties), which can only be found in [property
drawers](#Property_Drawers),
- [items](#Plain_Lists_and_Items), which can only be found in [plain
lists](#Plain_Lists_and_Items).
<a id="Greater_Blocks"></a>
## Greater Blocks
Greater blocks consist in the following pattern:
#+BEGIN_NAME PARAMETERS
CONTENTS
#+END_NAME
NAME can contain any non-whitespace character.
PARAMETERS can contain any character other than new line, and can be omitted.
If NAME is `CENTER`, it will be a **center block**. If it is `QUOTE`, it will be
a **quote block**.
If the block is neither a center block, a quote block or a [block
element](#Blocks), it will be a **special block**.
CONTENTS can contain any element, except : a line `#+END_NAME` on its own. Also
lines beginning with STARS must be quoted by a comma.
<a id="Drawers"></a>
## Drawers and Property Drawers
Pattern for drawers is:
:NAME:
CONTENTS
:END:
NAME can contain word-constituent characters, hyphens and underscores.
CONTENTS can contain any element but another drawer.
<a id="Dynamic_Blocks"></a>
## Dynamic Blocks
Pattern for dynamic blocks is:
#+BEGIN: NAME PARAMETERS
CONTENTS
#+END:
NAME cannot contain any whitespace character.
PARAMETERS can contain any character and can be omitted.
<a id="Footnote_Definitions"></a>
## Footnote Definitions
Pattern for footnote definitions is:
[fn:LABEL] CONTENTS
It must start at column 0.
LABEL is either a number or follows the pattern `fn:WORD`, where word can
contain any word-constituent character, hyphens and underscore characters.
CONTENTS can contain any element excepted another footnote definition. It ends
at the next footnote definition, the next headline, two consecutive empty lines
or the end of buffer.
<a id="Inlinetasks"></a>
## Inlinetasks
Inlinetasks are defined by `org-inlinetask-min-level` contiguous asterisk
characters starting at column 0, followed by a whitespace character.
Optionally, inlinetasks can be ended with a string constituted of
`org-inlinetask-min-level` contiguous asterisk characters starting at column 0,
followed by a space and the `END` string.
Inlinetasks are recognized only after `org-inlinetask` library is loaded.
<a id="Plain_Lists_and_Items"></a>
## Plain Lists and Items
Items are defined by a line starting with the following pattern:
`BULLET COUNTER-SET CHECK-BOX TAG`, in which only BULLET is mandatory.
BULLET is either an asterisk, a hyphen, a plus sign character or follows either
the pattern `COUNTER.` or `COUNTER)`. In any case, BULLET is follwed by a
whitespace character or line ending.
COUNTER can be a number or a single letter.
COUNTER-SET follows the pattern [@COUNTER].
CHECK-BOX is either a single whitespace character, a `X` character or a hyphen,
enclosed within square brackets.
TAG follows `TAG-TEXT ::` pattern, where TAG-TEXT can contain any character but
a new line.
An item ends before the next item, the first line less or equally indented than
its starting line, or two consecutive empty lines. Indentation of lines within
other greater elements do not count, neither do inlinetasks boundaries.
A plain list is a set of consecutive items of the same indentation. It can only
directly contain items.
If first item in a plain list has a counter in its bullet, the plain list will
be an **ordered plain-list**. If it contains a tag, it will be a **descriptive
list**. Otherwise, it will be an **unordered list**. List types are mutually
exclusive.
For example, consider the following excerpt of an Org document:
1. item 1
2. [X] item 2
- some tag :: item 2.1
Its internal structure is as follows:
(ordered-plain-list
(item)
(item
(descriptive-plain-list
(item))))
<a id="Property_Drawers"></a>
## Property Drawers
Property drawers are a special type of drawer containing properties attached to
a headline. They are located right after a [headline](#Headlines_and_Sections)
and its [planning](#Clock,_Diary_Sexp_and_Planning) information.
HEADLINE
PROPERTYDRAWER
HEADLINE
PLANNING
PROPERTYDRAWER
PROPERTYDRAWER follows the pattern
:PROPERTIES:
CONTENTS
:END:
where CONTENTS consists of zero or more [node properties](#Node_Properties).
<a id="Tables"></a>
## Tables
Tables start at lines beginning with either a vertical bar or the `+-` string
followed by plus or minus signs only, assuming they are not preceded with lines
of the same type. These lines can be indented.
A table starting with a vertical bar has `org` type. Otherwise it has `table.el`
type.
Org tables end at the first line not starting with a vertical bar. Table.el
tables end at the first line not starting with either a vertical line or a plus
sign. Such lines can be indented.
An org table can only contain table rows. A table.el table does not contain
anything.
One or more `#+TBLFM: FORMULAS` lines, where `FORMULAS` can contain any
character, can follow an org table.
<a id="Elements"></a>
# Elements
Elements cannot contain any other element.
Only [keywords](#Keywords) whose name belongs to
`org-element-document-properties`, [verse blocks](#Blocks) ,
[paragraphs](#Paragraphs) and [table rows](#Table_Rows) can contain objects.
<a id="Babel_Call"></a>
## Babel Call
Pattern for babel calls is:
#+CALL: VALUE
VALUE is optional. It can contain any character but a new line.
<a id="Blocks"></a>
## Blocks
Like [greater blocks](#Greater_Blocks), pattern for blocks is:
#+BEGIN_NAME DATA
CONTENTS
#+END_NAME
NAME cannot contain any whitespace character.
1. If NAME is `COMMENT`, it will be a **comment block**.
2. If it is `EXAMPLE`, it will be an **example block**.
3. If it is `EXPORT`, it will be an **export block**.
4. If it is `SRC`, it will be a **source block**.
5. If it is `VERSE`, it will be a **verse block**.
DATA can contain any character but a new line. It can be ommitted, unless the
block is either a **source block** or an **export block**.
In the latter case, it should be constituted of a single word.
In the former case, it must follow the pattern `LANGUAGE SWITCHES ARGUMENTS`,
where SWITCHES and ARGUMENTS are optional.
LANGUAGE cannot contain any whitespace character.
SWITCHES is made of any number of `SWITCH` patterns, separated by blank lines.
A SWITCH pattern is either `-l FORMAT`, where FORMAT can contain any character
but a double quote and a new line, `-S` or `+S`, where S stands for a single
letter.
ARGUMENTS can contain any character but a new line.
CONTENTS can contain any character, including new lines. Though it will only
contain Org objects if the block is a verse block. Otherwise, CONTENTS will not
be parsed.
<a id="Clock,_Diary_Sexp_and_Planning"></a>
## Clock, Diary Sexp and Planning
A clock follows either of the patterns below:
CLOCK: INACTIVE-TIMESTAMP
CLOCK: INACTIVE-TIMESTAMP-RANGE DURATION
INACTIVE-TIMESTAMP, resp. INACTIVE-TIMESTAMP-RANGE, is an inactive, resp.
inactive range, [timestamp](#Timestamp) object.
DURATION follows the pattern:
=> HH:MM
HH is a number containing any number of digits. MM is a two digit numbers.
A diary sexp is a line starting at column 0 with `%%(` string. It can then
contain any character besides a new line.
A planning is an element with the following pattern:
HEADLINE
PLANNING
where HEADLINE is a [headline](#Headlines_and_Sections) element and PLANNING is
a line filled with INFO parts, where each of them follows the pattern:
KEYWORD: TIMESTAMP
KEYWORD is either `DEADLINE`, `SCHEDULED` or `CLOSED`. TIMESTAMP is a
[timestamp](#Timestamp) object.
In particular, no blank line is allowed between PLANNING and HEADLINE.
<a id="Comments"></a>
## Comments
A **comment line** starts with a hash signe and a whitespace character or an end
of line.
Comments can contain any number of consecutive comment lines.
<a id="Fixed_Width_Areas"></a>
## Fixed Width Areas
A **fixed-width line** start with a colon character and a whitespace or an end
of line.
Fixed width areas can contain any number of consecutive fixed-width lines.
<a id="Horizontal_Rules"></a>
## Horizontal Rules
A horizontal rule is a line made of at least 5 consecutive hyphens. It can be
indented.
<a id="Keywords"></a>
## Keywords
Keywords follow the syntax:
#+KEY: VALUE
KEY can contain any non-whitespace character, but it cannot be equal to `CALL`
or any affiliated keyword.
VALUE can contain any character excepted a new line.
If KEY belongs to `org-element-document-properties`, VALUE can contain objects.
<a id="LaTeX_Environments"></a>
## LaTeX Environments
Pattern for LaTeX environments is:
\begin{NAME} CONTENTS \end{NAME}
NAME is constituted of alpha-numeric or asterisk characters.
CONTENTS can contain anything but the `\end{NAME}` string.
<a id="Node_Properties"></a>
## Node Properties
Node properties can only exist in [property drawers](#Property_Drawers). Their
pattern is any of the following
:NAME: VALUE
:NAME+: VALUE
:NAME:
:NAME+:
NAME can contain any non-whitespace character but cannot end with a plus sign.
It cannot be the empty string.
VALUE can contain anything but a newline character.
<a id="Paragraphs"></a>
## Paragraphs
Paragraphs are the default element, which means that any unrecognized context is
a paragraph.
Empty lines and other elements end paragraphs.
Paragraphs can contain every type of object.
<a id="Table_Rows"></a>
## Table Rows
A table rows is either constituted of a vertical bar and any number of [table
cells](#Table_Cells) or a vertical bar followed by a hyphen.
In the first case the table row has the **standard** type. In the second case,
it has the **rule** type.
Table rows can only exist in [tables](#Tables).
<a id="Objects"></a>
# Objects
Objects can only be found in the following locations:
- [affiliated keywords](#Affiliated_keywords) defined in
`org-element-parsed-keywords`,
- [document properties](#Keywords),
- [headline](#Headlines_and_Sections) titles,
- [inlinetask](#Inlinetasks) titles,
- [item](#Plain_Lists_and_Items) tags,
- [paragraphs](#Paragraphs),
- [table cells](#Table_Cells),
- [table rows](#Table_Rows), which can only contain table cell objects,
- [verse blocks](#Blocks).
Most objects cannot contain objects. Those which can will be specified.
<a id="Entities_and_LaTeX_Fragments"></a>
## Entities and LaTeX Fragments
An entity follows the pattern:
\NAME POST
where NAME has a valid association in either `org-entities` or
`org-entities-user`.
POST is the end of line, `{}` string, or a non-alphabetical character. It isn't
separated from NAME by a whitespace character.
A LaTeX fragment can follow multiple patterns:
\NAME BRACKETS
\(CONTENTS\)
\[CONTENTS\]
$$CONTENTS$$
PRE$CHAR$POST
PRE$BORDER1 BODY BORDER2$POST
NAME contains alphabetical characters only and must not have an association in
either **org-entities** or **org-entities-user**.
BRACKETS is optional, and is not separated from NAME with white spaces. It may
contain any number of the following patterns:
[CONTENTS1]
{CONTENTS2}
where CONTENTS1 can contain any characters excepted `{` `}`, `[` `]` and newline
and CONTENTS2 can contain any character excepted `{`, `}` and newline.
CONTENTS can contain any character but cannot contain `\\)` in the second
template or `\\]` in the third one.
PRE is either the beginning of line or a character different from `$`.
CHAR is a non-whitespace character different from `.`, `,`, `?`, `;`, `'` or a
double quote.
POST is any punctuation (including parentheses and quotes) or space character,
or the end of line.
BORDER1 is a non-whitespace character different from `.`, `,`, `;` and `$`.
BODY can contain any character excepted `$`, and may not span over more than 3
lines.
BORDER2 is any non-whitespace character different from `,`, `.` and `$`.
---
> It would introduce incompatibilities with previous Org versions, but support
> for `$...$` (and for symmetry, `$$...$$`) constructs ought to be removed.
>
> They are slow to parse, fragile, redundant and imply false positives. &#x2014;
> ngz
<a id="Export_Snippets"></a>
## Export Snippets
Patter for export snippets is:
@@NAME:VALUE@@
NAME can contain any alpha-numeric character and hyphens.
VALUE can contain anything but `@@` string.
<a id="Footnote_References"></a>
## Footnote References
There are four patterns for footnote references:
[fn:LABEL]
[fn:LABEL:DEFINITION]
[fn::DEFINITION]
LABEL can contain any word constituent character, hyphens and underscores.
DEFINITION can contain any character. Though opening and closing square brackets
must be balanced in it. It can contain any object encountered in a paragraph,
even other footnote references.
If the reference follows the second pattern, it is called an **inline
footnote**. If it follows the third one, i.e. if LABEL is omitted, it is an
**anonymous footnote**.
<a id="Inline_Babel_Calls_and_Source_Blocks"></a>
## Inline Babel Calls and Source Blocks
Inline Babel calls follow any of the following patterns:
call_NAME(ARGUMENTS)
call_NAME[HEADER](ARGUMENTS)[HEADER]
NAME can contain any character besides `(`, `)` and `\n`.
HEADER can contain any character besides `]` and `\n`.
ARGUMENTS can contain any character besides `)` and `\n`.
Inline source blocks follow any of the following patterns:
src_LANG{BODY}
src_LANG[OPTIONS]{BODY}
LANG can contain any non-whitespace character.
OPTIONS and BODY can contain any character but `\n`.
<a id="Line_Breaks"></a>
## Line Breaks
A line break consists in `\\\SPACE` pattern at the end of an otherwise non-empty
line.
SPACE can contain any number of tabs and spaces, including 0.
<a id="Links"></a>
## Links
There are 4 major types of links:
PRE1 RADIO POST1 ("radio" link)
<PROTOCOL:PATH> ("angle" link)
PRE2 PROTOCOL:PATH2 POST2 ("plain" link)
[[PATH3]DESCRIPTION] ("regular" link)
PRE1 and POST1, when they exist, are non alphanumeric characters.
RADIO is a string matched by some [radio target](#Targets_and_Radio_Targets). It
may contain [entities](#Entities_and_LaTeX_Fragments), [latex
fragments](#Entities_and_LaTeX_Fragments),
[subscript](#Subscript_and_Superscript) and
[superscript](#Subscript_and_Superscript).
PROTOCOL is a string among `org-link-types`.
PATH can contain any character but `]`, `<`, `>` and `\n`.
PRE2 and POST2, when they exist, are non word constituent characters.
PATH2 can contain any non-whitespace character excepted `(`, `)`, `<` and `>`.
It must end with a word-constituent character, or any non-whitespace
non-punctuation character followed by `/`.
DESCRIPTION must be enclosed within square brackets. It can contain any
character but square brackets. It can contain any object found in a paragraph
excepted a [footnote reference](#Footnote_References), a [radio
target](#Targets_and_Radio_Targets) and a [line break](#Line_Breaks). It cannot
contain another link either, unless it is a plain or angular link.
DESCRIPTION is optional.
PATH3 is built according to the following patterns:
FILENAME ("file" type)
PROTOCOL:PATH4 ("PROTOCOL" type)
PROTOCOL://PATH4 ("PROTOCOL" type)
id:ID ("id" type)
#CUSTOM-ID ("custom-id" type)
(CODEREF) ("coderef" type)
FUZZY ("fuzzy" type)
FILENAME is a file name, either absolute or relative.
PATH4 can contain any character besides square brackets.
ID is constituted of hexadecimal numbers separated with hyphens.
PATH4, CUSTOM-ID, CODEREF and FUZZY can contain any character besides square
brackets.
<a id="Macros"></a>
## Macros
Macros follow the pattern:
{{{NAME(ARGUMENTS)}}}
NAME must start with a letter and can be followed by any number of alpha-numeric
characters, hyphens and underscores.
ARGUMENTS can contain anything but `}}}` string. Values within ARGUMENTS are
separated by commas. Non-separating commas have to be escaped with a backslash
character.
<a id="Targets_and_Radio_Targets"></a>
## Targets and Radio Targets
Radio targets follow the pattern:
<<<CONTENTS>>>
CONTENTS can be any character besides `<`, `>` and `\n`. It cannot start or end
with a whitespace character. As far as objects go, it can contain [text
markup](#Emphasis_Markers), [entities](#Entities_and_LaTeX_Fragments), [latex
fragments](#Entities_and_LaTeX_Fragments),
[subscript](#Subscript_and_Superscript) and
[superscript](#Subscript_and_Superscript) only.
Targets follow the pattern:
<<TARGET>>
TARGET can contain any character besides `<`, `>` and `\n`. It cannot start or
end with a whitespace character. It cannot contain any object.
<a id="Statistics_Cookies"></a>
## Statistics Cookies
Statistics cookies follow either pattern:
[PERCENT%]
[NUM1/NUM2]
PERCENT, NUM1 and NUM2 are numbers or the empty string.
<a id="Subscript_and_Superscript"></a>
## Subscript and Superscript
Pattern for subscript is:
CHAR_SCRIPT
Pattern for superscript is:
CHAR^SCRIPT
CHAR is any non-whitespace character.
SCRIPT can be `*` or an expression enclosed in parenthesis (respectively curly
brackets), possibly containing balanced parenthesis (respectively curly
brackets).
SCRIPT can also follow the pattern:
SIGN CHARS FINAL
SIGN is either a plus sign, a minus sign, or an empty string.
CHARS is any number of alpha-numeric characters, commas, backslashes and dots,
or an empty string.
FINAL is an alpha-numeric character.
There is no white space between SIGN, CHARS and FINAL.
<a id="Table_Cells"></a>
## Table Cells
Table cells follow the pattern:
CONTENTS SPACES|
CONTENTS can contain any character excepted a vertical bar.
SPACES contains any number of space characters, including zero. It can be used
to align properly the table.
The final bar may be replaced with a newline character for the last cell in row.
<a id="Timestamp"></a>
## Timestamps
There are seven possible patterns for timestamps:
<%%(SEXP)> (diary)
<DATE TIME REPEATER-OR-DELAY> (active)
[DATE TIME REPEATER-OR-DELAY] (inactive)
<DATE TIME REPEATER-OR-DELAY>--<DATE TIME REPEATER-OR-DELAY> (active range)
<DATE TIME-TIME REPEATER-OR-DELAY> (active range)
[DATE TIME REPEATER-OR-DELAY]--[DATE TIME REPEATER-OR-DELAY] (inactive range)
[DATE TIME-TIME REPEATER-OR-DELAY] (inactive range)
SEXP can contain any character excepted `>` and `\n`.
DATE follows the pattern:
YYYY-MM-DD DAYNAME
`Y`, `M` and `D` are digits. DAYNAME can contain any non whitespace-character
besides `+`, `-`, `]`, `>`, a digit or `\n`.
TIME follows the pattern `H:MM`. `H` can be one or two digit long and can start
with 0.
REPEATER-OR-DELAY follows the pattern:
MARK VALUE UNIT
MARK is `+` (cumulate type), `++` (catch-up type) or `.+` (restart type) for a
repeater, and `-` (all type) or `--` (first type) for warning delays.
VALUE is a number.
UNIT is a character among `h` (hour), `d` (day), `w` (week), `m` (month), `y`
(year).
MARK, VALUE and UNIT are not separated by whitespace characters.
There can be two REPEATER-OR-DELAY in the timestamp: one as a repeater and one
as a warning delay.
<a id="Emphasis_Markers"></a>
## Text Markup
Text markup follows the pattern:
PRE MARKER CONTENTS MARKER POST
PRE is a whitespace character, `(`, `{` `'` or a double quote. It can also be a
beginning of line.
MARKER is a character among `*` (bold), `=` (verbatim), `/` (italic), `+`
(strike-through), `_` (underline), `~` (code).
CONTENTS is a string following the pattern:
BORDER BODY BORDER
BORDER can be any non-whitespace character excepted `,`, `'` or a double quote.
BODY can contain contain any character but may not span over more than 3 lines.
BORDER and BODY are not separated by whitespaces.
CONTENTS can contain any object encountered in a paragraph when markup is
**bold**, **italic**, **strike-through** or **underline**.
POST is a whitespace character, `-`, `.`, `,`, `:`, `!`, `?`, `'`, `)`, `}` or a
double quote. It can also be an end of line.
PRE, MARKER, CONTENTS, MARKER and POST are not separated by whitespace
characters.
---
> All of this is wrong if `org-emphasis-regexp-components` or
> `org-emphasis-alist` are modified.
>
> This should really be simplified.
>
> Also, CONTENTS should be anything within code and verbatim emphasis, by
> definition. &#x2014; ngz
# Footnotes
<sup><a id="fn.1" href="#fnr.1">1</a></sup> In particular, the parser requires
stars at column 0 to be quoted by a comma when they do not define a headline.
<sup><a id="fn.2" href="#fnr.2">2</a></sup> It also means that only headlines
and sections can be recognized just by looking at the beginning of the line.
Planning lines and property drawers can be recognized by looking at one or two
lines above.
As a consequence, using `org-element-at-point` or `org-element-context` will
move up to the parent headline, and parse top-down from there until context
around original location is found.

81
examples/custom.rs Normal file
View file

@ -0,0 +1,81 @@
use std::convert::From;
use std::env::args;
use std::fs;
use std::io::{Error as IOError, Write};
use std::result::Result;
use std::string::FromUtf8Error;
use orgize::export::{DefaultHtmlHandler, HtmlHandler};
use orgize::{Element, Org};
use slugify::slugify;
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
#[derive(Default)]
struct MyHtmlHandler(DefaultHtmlHandler);
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
if title.level > 6 {
return Err(MyError::Heading);
} else {
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">",
title.level,
slugify!(&title.raw),
)?;
}
} else {
// fallthrough to default handler
self.0.start(w, element)?;
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
if let Element::Title(title) = element {
write!(w, "</a></h{}>", title.level)?;
} else {
self.0.end(w, element)?;
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?)?;
let mut writer = Vec::new();
let mut handler = MyHtmlHandler::default();
Org::parse(&contents).write_html_custom(&mut writer, &mut handler)?;
println!("{}", String::from_utf8(writer)?);
}
Ok(())
}

View file

@ -1,44 +0,0 @@
//! ```bash
//! cargo run --example html-slugify '* hello world!'
//! ```
use orgize::{
export::HtmlExport,
export::{from_fn_with_ctx, Container, Event, Traverser},
Org,
};
use slugify::slugify;
use std::cmp::min;
use std::env::args;
fn main() {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-mode-string>", args[0]);
} else {
let mut html_export = HtmlExport::default();
let mut handler = from_fn_with_ctx(|event, ctx| {
if let Event::Enter(Container::Headline(headline)) = event {
let level = min(headline.level(), 6);
let title = headline.title().map(|e| e.to_string()).collect::<String>();
html_export.push_str(format!(
"<h{level}><a id=\"{0}\" href=\"#{0}\">",
slugify!(&title)
));
for elem in headline.title() {
html_export.element(elem, ctx);
}
html_export.push_str(format!("</a></h{level}>"));
} else {
// forward to default html export
html_export.event(event, ctx);
}
});
Org::parse(&args[1]).traverse(&mut handler);
println!("{}", html_export.finish());
}
}

19
examples/iter.rs Normal file
View file

@ -0,0 +1,19 @@
use orgize::Org;
use std::env::args;
use std::fs;
use std::io::Result;
fn main() -> Result<()> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?).unwrap();
for event in Org::parse(&contents).iter() {
println!("{:?}", event);
}
}
Ok(())
}

17
examples/json.rs Normal file
View file

@ -0,0 +1,17 @@
use orgize::Org;
use serde_json::to_string;
use std::env::args;
use std::fs;
use std::io::Result;
fn main() -> Result<()> {
let args: Vec<_> = args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <org-file>", args[0]);
} else {
let contents = String::from_utf8(fs::read(&args[1])?).unwrap();
println!("{}", to_string(&Org::parse(&contents)).unwrap());
}
Ok(())
}

View file

@ -1,23 +0,0 @@
//! ```bash
//! cargo run --example markdown test.org
//! ```
use orgize::{export::MarkdownExport, Org};
use std::{env::args, fs};
fn main() {
let args: Vec<_> = args().collect();
if args.len() < 2 {
panic!("Usage: {} <org-mode-file>", args[0]);
}
let content = fs::read_to_string(&args[1]).unwrap();
let mut export = MarkdownExport::default();
Org::parse(content).traverse(&mut export);
fs::write(format!("{}.md", &args[1]), export.finish()).unwrap();
println!("Wrote to {}.md", &args[1]);
}

View file

@ -1,30 +0,0 @@
//! ```bash
//! cargo run --example parse '* hello\n** /world/!'
//! ```
use orgize::Org;
use rowan::ast::AstNode;
use std::env::args;
use tracing_subscriber::fmt::format::FmtSpan;
fn main() {
let args: Vec<_> = args().collect();
tracing_subscriber::fmt()
.without_time()
.with_file(true)
.with_span_events(FmtSpan::NEW)
.with_line_number(true)
.with_max_level(tracing::Level::TRACE)
.with_file(false)
.with_line_number(false)
.init();
if args.len() < 2 {
eprintln!("Usage: {} <org-mode-string>", args[0]);
} else {
let s = &args[1].replace(r"\n", "\n").replace(r"\r", "\r");
let org = Org::parse(s);
println!("{:#?}", org.document().syntax());
}
}

1
fuzz/.gitignore vendored
View file

@ -1,4 +1,3 @@
target
corpus
artifacts
coverage

View file

@ -1,27 +1,20 @@
[package]
name = "orgize-fuzz"
version = "0.0.0"
version = "0.0.1"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.orgize]
path = ".."
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }
orgize = { path = ".." }
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "fuzz_target_1"
path = "fuzz_targets/fuzz_target_1.rs"
test = false
doc = false

View file

@ -1,7 +1,14 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate orgize;
use orgize::Org;
#[cfg_attr(rustfmt, rustfmt_skip)]
libfuzzer_sys::fuzz_target!(|data: &[u8]| {
if let Ok(utf8) = std::str::from_utf8(data) {
let _ = orgize::Org::parse(utf8);
if let Ok(s) = std::str::from_utf8(data) {
let _ = Org::parse(s);
}
});

View file

@ -1,52 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, AffiliatedKeyword, Token};
impl AffiliatedKeyword {
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.key(), "CAPTION");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("keyword must contains TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert!(keyword.optional().is_none());
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.optional().unwrap(), "OPTIONAL");
/// ```
pub fn optional(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|it| it.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
}
///
/// ```rust
/// use orgize::{Org, ast::AffiliatedKeyword};
///
/// let keyword = Org::parse("#+CAPTION: VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), " VALUE");
/// let keyword = Org::parse("#+CAPTION[OPTIONAL]:VALUE\nabc").first_node::<AffiliatedKeyword>().unwrap();
/// assert_eq!(keyword.value().unwrap(), "VALUE");
/// ```
pub fn value(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
}
}

View file

@ -1,194 +0,0 @@
use super::{
filter_token, CenterBlock, CommentBlock, DynBlock, ExampleBlock, ExportBlock, QuoteBlock,
SourceBlock, SpecialBlock, SyntaxKind, Token, VerseBlock,
};
use rowan::TextSize;
impl SourceBlock {
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "c");
/// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.language().unwrap(), "javascript");
///
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.language().is_none());
/// ````
pub fn language(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20");
/// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.switches().unwrap(), "-n 20 -r");
///
/// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.switches().is_none());
/// ````
pub fn switches(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES))
}
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle yes");
/// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.parameters().unwrap(), ":tangle");
///
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
/// assert!(block.parameters().is_none());
/// ````
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS))
}
/// Return unescaped source code string
///
/// ```rust
/// use orgize::{Org, ast::SourceBlock};
///
/// let block = Org::parse(r#"
/// #+begin_src
/// #+end_src
/// "#).first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "");
///
/// let block = Org::parse(r#"
/// #+begin_src
/// ,* foo
/// ,#+ bar
/// #+end_src
/// "#).first_node::<SourceBlock>().unwrap();
/// assert_eq!(block.value(), "* foo\n#+ bar\n");
/// ````
pub fn value(&self) -> String {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, value| acc + &value)
}
}
impl ExportBlock {
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse("#+begin_export html\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.ty().unwrap(), "html");
///
/// let block = Org::parse("#+begin_export\n#+end_export").first_node::<ExportBlock>().unwrap();
/// assert!(block.ty().is_none());
/// ````
pub fn ty(&self) -> Option<Token> {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE))
}
/// Returns export block contents
///
/// ```rust
/// use orgize::{Org, ast::ExportBlock};
///
/// let block = Org::parse(r#"
/// #+begin_export html
/// <style>.red { color: red; }</style>
/// #+end_export
/// "#).first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.value(), "<style>.red { color: red; }</style>\n");
///
/// let block = Org::parse(r#"
/// #+BEGIN_EXPORT org
/// ,#+BEGIN_EXPORT html
/// <style>.red { color: red; }</style>
/// ,#+END_EXPORT
/// #+END_EXPORT
/// "#).first_node::<ExportBlock>().unwrap();
/// assert_eq!(block.value(), r#"#+BEGIN_EXPORT html
/// <style>.red { color: red; }</style>
/// #+END_EXPORT
/// "#);
/// ```
pub fn value(&self) -> String {
self.syntax
.children()
.find(|e| e.kind() == SyntaxKind::BLOCK_CONTENT)
.into_iter()
.flat_map(|n| n.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, value| acc + &value)
}
}
macro_rules! impl_content_border {
($block:ident) => {
impl $block {
/// Beginning position of block content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::BLOCK_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "block must contains BLOCK_BEGIN");
TextSize::default()
})
}
/// Ending position of block content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::BLOCK_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "block must contains BLOCK_END");
TextSize::default()
})
}
}
};
}
impl_content_border!(SourceBlock);
impl_content_border!(ExportBlock);
impl_content_border!(CenterBlock);
impl_content_border!(CommentBlock);
impl_content_border!(ExampleBlock);
impl_content_border!(QuoteBlock);
impl_content_border!(SpecialBlock);
impl_content_border!(VerseBlock);
impl_content_border!(DynBlock);

View file

@ -1,58 +0,0 @@
use rowan::ast::support;
use crate::{ast::Token, SyntaxKind};
use super::{Clock, Timestamp};
impl Clock {
pub fn value(&self) -> Option<Timestamp> {
support::child(&self.syntax)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.duration().is_none());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert_eq!(clock.duration().unwrap(), "12:00");
///
/// ```
pub fn duration(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|t| t.kind() != SyntaxKind::DOUBLE_ARROW)
.skip(1)
.find(|t| t.kind() != SyntaxKind::WHITESPACE)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(!clock.is_closed());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(clock.is_closed());
/// ```
pub fn is_closed(&self) -> bool {
self.syntax
.children_with_tokens()
.any(|t| t.kind() == SyntaxKind::DOUBLE_ARROW)
}
/// ```rust
/// use orgize::{Org, ast::Clock};
///
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39]").first_node::<Clock>().unwrap();
/// assert!(clock.is_running());
/// let clock = Org::parse("CLOCK: [2003-09-16 Tue 09:39] =>12:00").first_node::<Clock>().unwrap();
/// assert!(!clock.is_running());
/// ```
pub fn is_running(&self) -> bool {
!self.is_closed()
}
}

View file

@ -1,111 +0,0 @@
use crate::{syntax::OrgLanguage, SyntaxElement, SyntaxKind, SyntaxNode};
use rowan::{ast::AstNode, TextRange, TextSize};
use super::Token;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Cloze {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for Cloze {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool {
kind == SyntaxKind::CLOZE
}
fn cast(node: SyntaxNode) -> Option<Cloze> {
Self::can_cast(node.kind()).then(|| Cloze { syntax: node })
}
fn syntax(&self) -> &SyntaxNode {
&self.syntax
}
}
impl Cloze {
/// Beginning position of this element
pub fn start(&self) -> TextSize {
self.syntax.text_range().start()
}
/// Ending position of this element
pub fn end(&self) -> TextSize {
self.syntax.text_range().end()
}
/// Range of this element
pub fn text_range(&self) -> TextRange {
self.syntax.text_range()
}
/// Raw text of this element
pub fn raw(&self) -> String {
self.syntax.to_string()
}
pub fn text(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children_with_tokens()
.skip(1)
.take_while(|n| n.kind() != SyntaxKind::R_CURLY)
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "text");
/// let cloze = Org::parse("{{$\\frac{1}{2}$}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), "$\\frac{1}{2}$");
/// let cloze = Org::parse("{{ [[file:my_image.png]] }{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.text_raw(), " [[file:my_image.png]] ");
/// ```
pub fn text_raw(&self) -> String {
self.text()
.fold(String::new(), |acc, e| acc + &e.to_string())
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.hint().is_none());
/// let cloze = Org::parse("{{text}{}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "");
/// let cloze = Org::parse("{{text}{hint}}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.hint().unwrap(), "hint");
/// ```
pub fn hint(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_CURLY)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
/// ```rust
/// use orgize::{Org, ast::Cloze};
///
/// let cloze = Org::parse("{{text}}").first_node::<Cloze>().unwrap();
/// assert!(cloze.id().is_none());
/// let cloze = Org::parse("{{text}@}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "");
/// let cloze = Org::parse("{{text}@id}").first_node::<Cloze>().unwrap();
/// assert_eq!(cloze.id().unwrap(), "id");
/// ```
pub fn id(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::AT)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
}

View file

@ -1,20 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Comment};
impl Comment {
/// Contents without pound signs
///
/// ```rust
/// use orgize::{ast::Comment, Org};
///
/// let fixed = Org::parse("# A\n#\n# B\n# C").first_node::<Comment>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

View file

@ -1,88 +0,0 @@
use rowan::ast::AstNode;
use crate::Org;
use super::{Document, Keyword, PropertyDrawer};
impl Document {
/// Returns an iterator of keywords in zeroth section
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse(r#"
/// #+TITLE: hello
/// #+TITLE: world
/// #+DATE: today
/// #+AUTHOR: poi
/// * headline
/// #+SOMETHING:"#);
/// let doc = org.first_node::<Document>().unwrap();
/// assert_eq!(doc.keywords().count(), 4);
/// ```
pub fn keywords(&self) -> impl Iterator<Item = Keyword> {
self.section()
.into_iter()
.flat_map(|section| section.syntax.children().filter_map(Keyword::cast))
}
/// Returns the value in top-level `#+TITLE`
///
/// Multiple `#+TITLE` are joined with spaces.
///
/// Returns `None` if file doesn't contain `#+TITLE`
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse("#+TITLE: hello\n#+TITLE: world");
/// let doc = org.first_node::<Document>().unwrap();
/// assert_eq!(doc.title().unwrap(), "hello world");
///
/// let org = Org::parse("");
/// let doc = org.first_node::<Document>().unwrap();
/// assert!(doc.title().is_none());
/// ```
pub fn title(&self) -> Option<String> {
self.keywords()
.filter(|kw| kw.key().eq_ignore_ascii_case("TITLE"))
.fold(Option::<String>::None, |acc, cur| {
let mut s = acc.unwrap_or_default();
if !s.is_empty() {
s.push(' ');
}
s.push_str(cur.value().trim());
Some(s)
})
}
/// Returns top-level properties drawer
///
/// ```rust
/// use orgize::{Org, ast::Document};
///
/// let org = Org::parse(r#":PROPERTIES:
/// :ID: 20220718T085035.042592
/// :END:
/// #+TITLE: Complete Computing"#);
///
/// let properties = org.document().properties().unwrap();
/// assert_eq!(properties.to_hash_map().len(), 1);
/// assert_eq!(properties.get("ID").unwrap(), "20220718T085035.042592");
/// ```
pub fn properties(&self) -> Option<PropertyDrawer> {
rowan::ast::support::child(&self.syntax)
}
}
impl Org {
/// Equals to `self.document().title()`, see [Document::title]
pub fn title(&self) -> Option<String> {
self.document().title()
}
/// Equals to `self.document().keywords()`, see [Document::keywords]
pub fn keywords(&self) -> impl Iterator<Item = Keyword> {
self.document().keywords()
}
}

View file

@ -1,140 +0,0 @@
use rowan::TextSize;
use std::collections::HashMap;
use super::{filter_token, Drawer, PropertyDrawer, SyntaxKind, Token};
impl PropertyDrawer {
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.iter().count(), 2);
/// ```
pub fn iter(&self) -> impl Iterator<Item = (Token, Token)> {
self.node_properties().filter_map(|property| {
let mut texts = property
.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT));
Some((texts.next()?, texts.next()?))
})
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// assert_eq!(drawer.get("CUSTOM_ID").unwrap(), "someid");
/// assert_eq!(drawer.get("ID").unwrap(), "id");
/// ```
pub fn get(&self, key: &str) -> Option<Token> {
self.iter().find_map(|(k, v)| (k == key).then_some(v))
}
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:CUSTOM_ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_hash_map();
/// assert_eq!(map.len(), 1);
/// assert_eq!(map.get("CUSTOM_ID").unwrap(), "id");
/// ```
pub fn to_hash_map(&self) -> HashMap<Token, Token> {
self.iter().collect()
}
#[cfg(feature = "indexmap")]
/// ```rust
/// use orgize::{Org, ast::PropertyDrawer};
///
/// let org = Org::parse("* Heading\n:PROPERTIES:\n:CUSTOM_ID: someid\n:ID: id\n:END:");
/// let drawer = org.first_node::<PropertyDrawer>().unwrap();
/// let map = drawer.to_index_map();
/// let item1 = map.get_index(1).unwrap();
/// assert_eq!(item1.0, "ID");
/// assert_eq!(item1.1, "id");
/// ```
pub fn to_index_map(&self) -> indexmap::IndexMap<Token, Token> {
self.iter().collect()
}
/// Beginning position of drawer content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "property drawer must contains DRAWER_BEGIN");
TextSize::default()
})
}
/// Ending position of drawer content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "property drawer must contains DRAWER_END");
TextSize::default()
})
}
}
impl Drawer {
/// ```rust
/// use orgize::{Org, ast::Drawer};
///
/// let org = Org::parse("* Heading\n:LOGBOOK:\n:END:");
/// let drawer = org.first_node::<Drawer>().unwrap();
/// assert_eq!(drawer.name(), "LOGBOOK");
/// ```
pub fn name(&self) -> Token {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.expect("drawer must contains DRAWER_BEGIN")
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("drawer begin must contains TEXT")
}
/// Beginning position of drawer content
pub fn content_start(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_BEGIN)
.map(|n| n.text_range().end())
.unwrap_or_else(|| {
debug_assert!(false, "drawer must contains DRAWER_BEGIN");
TextSize::default()
})
}
/// Ending position of drawer content
pub fn content_end(&self) -> TextSize {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_END)
.map(|n| n.text_range().start())
.unwrap_or_else(|| {
debug_assert!(false, "drawer must contains DRAWER_END");
TextSize::default()
})
}
/// Raw text of drawer content
pub fn content_raw(&self) -> String {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::DRAWER_CONTENT)
.map(|n| n.to_string())
.unwrap_or_default()
}
}

View file

@ -1,168 +0,0 @@
use crate::{entities::ENTITIES, SyntaxKind};
use super::{filter_token, Entity};
impl Entity {
fn entity(&self) -> Option<&(&str, &str, bool, &str, &str, &str, &str)> {
let token = self
.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))?;
ENTITIES.iter().find(|i| i.0 == token.as_ref())
}
/// Entity name
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\alpha{}").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), "alpha");
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert_eq!(e.name(), " ");
/// ```
pub fn name(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.0,
)
}
/// Entity LaTeX representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert_eq!(e.latex(), "\\textperiodcentered{}");
/// ```
pub fn latex(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.1,
)
}
/// Whether entity needs to be in math mode
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\middot").first_node::<Entity>().unwrap();
/// assert!(!e.is_latex_math());
/// let e = Org::parse("\\alefsym").first_node::<Entity>().unwrap();
/// assert!(e.is_latex_math());
/// ```
pub fn is_latex_math(&self) -> bool {
self.entity().map_or_else(
|| {
debug_assert!(false);
false
},
|e| e.2,
)
}
/// Entity HTML representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.html(), "&sect;");
/// ```
pub fn html(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.3,
)
}
/// Entity ASCII representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.ascii(), "section");
/// ```
pub fn ascii(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.4,
)
}
/// Entity Latin1 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.latin1(), ">");
/// ```
pub fn latin1(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.5,
)
}
/// Entity UTF-8 encoding representation
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\S").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "§");
/// let e = Org::parse("\\rsaquo").first_node::<Entity>().unwrap();
/// assert_eq!(e.utf8(), "");
/// ```
pub fn utf8(&self) -> &str {
self.entity().map_or_else(
|| {
debug_assert!(false);
""
},
|e| e.6,
)
}
/// Entity contains optional brackets
///
/// ```rust
/// use orgize::{ast::Entity, Org};
///
/// let e = Org::parse("\\beta").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// let e = Org::parse("\\S{}").first_node::<Entity>().unwrap();
/// assert!(e.is_use_brackets());
/// let e = Org::parse("\\_ ").first_node::<Entity>().unwrap();
/// assert!(!e.is_use_brackets());
/// ```
pub fn is_use_brackets(&self) -> bool {
self.syntax
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::TEXT)
.nth(1)
.is_some()
}
}

View file

@ -1,20 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, FixedWidth};
impl FixedWidth {
/// Contents without colons prefix
///
/// ```rust
/// use orgize::{ast::FixedWidth, Org};
///
/// let fixed = Org::parse(": A\n:\n: B\n: C").first_node::<FixedWidth>().unwrap();
/// assert_eq!(fixed.value(), "A\n\nB\nC");
/// ```
pub fn value(&self) -> String {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.fold(String::new(), |acc, text| acc + &text)
}
}

View file

@ -1,357 +0,0 @@
const nodes = [
{
struct: "Document",
kind: ["DOCUMENT"],
pre_blank: true,
first_child: [
["section", "Section"],
["first_headline", "Headline"],
],
last_child: [["last_headline", "Headline"]],
children: [["headlines", "Headline"]],
},
{
struct: "Section",
kind: ["SECTION"],
post_blank: true,
},
{
struct: "Paragraph",
kind: ["PARAGRAPH"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Headline",
kind: ["HEADLINE"],
first_child: [
["section", "Section"],
["planning", "Planning"],
["properties", "PropertyDrawer"],
],
children: [["headlines", "Headline"]],
post_blank: true,
},
{
struct: "PropertyDrawer",
kind: ["PROPERTY_DRAWER"],
children: [["node_properties", "NodeProperty"]],
},
{
struct: "NodeProperty",
kind: ["NODE_PROPERTY"],
},
{
struct: "Planning",
kind: ["PLANNING"],
},
{
struct: "OrgTable",
kind: ["ORG_TABLE"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "OrgTableRow",
kind: ["ORG_TABLE_RULE_ROW", "ORG_TABLE_STANDARD_ROW"],
},
{
struct: "OrgTableCell",
kind: ["ORG_TABLE_CELL"],
},
{
struct: "List",
kind: ["LIST"],
children: [["items", "ListItem"]],
affiliated_keywords: true,
},
{
struct: "ListItem",
kind: ["LIST_ITEM"],
},
{
struct: "Drawer",
kind: ["DRAWER"],
},
{
struct: "DynBlock",
kind: ["DYN_BLOCK"],
affiliated_keywords: true,
},
{
struct: "Keyword",
kind: ["KEYWORD"],
},
{
struct: "BabelCall",
kind: ["BABEL_CALL"],
},
{
struct: "AffiliatedKeyword",
kind: ["AFFILIATED_KEYWORD"],
},
{
struct: "TableEl",
kind: ["TABLE_EL"],
post_blank: true,
},
{
struct: "Clock",
kind: ["CLOCK"],
post_blank: true,
},
{
struct: "FnDef",
kind: ["FN_DEF"],
post_blank: true,
affiliated_keywords: true,
},
{
struct: "Comment",
kind: ["COMMENT"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "Rule",
kind: ["RULE"],
post_blank: true,
},
{
struct: "FixedWidth",
kind: ["FIXED_WIDTH"],
post_blank: true,
token: [["text", "TEXT"]],
affiliated_keywords: true,
},
{
struct: "SpecialBlock",
kind: ["SPECIAL_BLOCK"],
affiliated_keywords: true,
},
{
struct: "QuoteBlock",
kind: ["QUOTE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CenterBlock",
kind: ["CENTER_BLOCK"],
affiliated_keywords: true,
},
{
struct: "VerseBlock",
kind: ["VERSE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "CommentBlock",
kind: ["COMMENT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExampleBlock",
kind: ["EXAMPLE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "ExportBlock",
kind: ["EXPORT_BLOCK"],
affiliated_keywords: true,
},
{
struct: "SourceBlock",
kind: ["SOURCE_BLOCK"],
affiliated_keywords: true,
},
{
struct: "InlineCall",
kind: ["INLINE_CALL"],
},
{
struct: "InlineSrc",
kind: ["INLINE_SRC"],
},
{
struct: "Link",
kind: ["LINK"],
},
{
struct: "Cookie",
kind: ["COOKIE"],
},
{
struct: "RadioTarget",
kind: ["RADIO_TARGET"],
},
{
struct: "FnRef",
kind: ["FN_REF"],
},
{
struct: "Macros",
kind: ["MACROS"],
},
{
struct: "Snippet",
kind: ["SNIPPET"],
},
{
struct: "Target",
kind: ["TARGET"],
},
{
struct: "Bold",
kind: ["BOLD"],
},
{
struct: "Strike",
kind: ["STRIKE"],
},
{
struct: "Italic",
kind: ["ITALIC"],
},
{
struct: "Underline",
kind: ["UNDERLINE"],
},
{
struct: "Verbatim",
kind: ["VERBATIM"],
},
{
struct: "Code",
kind: ["CODE"],
token: [["text", "TEXT"]],
},
{
struct: "Timestamp",
kind: ["TIMESTAMP_ACTIVE", "TIMESTAMP_INACTIVE", "TIMESTAMP_DIARY"],
token: [
["year_start", "TIMESTAMP_YEAR"],
["month_start", "TIMESTAMP_MONTH"],
["day_start", "TIMESTAMP_DAY"],
["hour_start", "TIMESTAMP_HOUR"],
["minute_start", "TIMESTAMP_MINUTE"],
],
last_token: [
["year_end", "TIMESTAMP_YEAR"],
["month_end", "TIMESTAMP_MONTH"],
["day_end", "TIMESTAMP_DAY"],
["hour_end", "TIMESTAMP_HOUR"],
["minute_end", "TIMESTAMP_MINUTE"],
],
},
{
struct: "LatexEnvironment",
kind: ["LATEX_ENVIRONMENT"],
},
{
struct: "LatexFragment",
kind: ["LATEX_FRAGMENT"],
},
{
struct: "Entity",
kind: ["ENTITY"],
},
{
struct: "LineBreak",
kind: ["LINE_BREAK"],
},
{
struct: "Superscript",
kind: ["SUPERSCRIPT"],
},
{
struct: "Subscript",
kind: ["SUBSCRIPT"],
},
];
let content = `//! generated file, do not modify it directly
#![allow(clippy::all)]
#![allow(unused)]
use rowan::{ast::{support, AstChildren, AstNode}, TextSize, TextRange};
use crate::syntax::{OrgLanguage, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken};
fn affiliated_keyword(node: &SyntaxNode, filter: impl Fn(&str) -> bool) -> Option<AffiliatedKeyword> {
node.children()
.take_while(|n| n.kind() == SyntaxKind::AFFILIATED_KEYWORD)
.filter_map(AffiliatedKeyword::cast)
.find(|k| filter(&k.key()))
}
`;
for (const node of nodes) {
content += `
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ${node.struct} {
pub(crate) syntax: SyntaxNode,
}
impl AstNode for ${node.struct} {
type Language = OrgLanguage;
fn can_cast(kind: SyntaxKind) -> bool { ${node.kind
.map((k) => `kind == ${k}`)
.join(" || ")} }
fn cast(node: SyntaxNode) -> Option<${
node.struct
}> { Self::can_cast(node.kind()).then(|| ${node.struct} { syntax: node }) }
fn syntax(&self) -> &SyntaxNode { &self.syntax }
}
impl ${node.struct} {
/// Beginning position of this element
pub fn start(&self) -> TextSize {
self.syntax.text_range().start()
}
/// Ending position of this element
pub fn end(&self) -> TextSize {
self.syntax.text_range().end()
}
/// Range of this element
pub fn text_range(&self) -> TextRange {
self.syntax.text_range()
}
/// Raw text of this element
pub fn raw(&self) -> String {
self.syntax.to_string()
}
`;
for (const [method, kind] of node.token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.last_token || []) {
content += ` pub fn ${method}(&self) -> Option<super::Token> { super::last_token(&self.syntax, ${kind}) }\n`;
}
for (const [method, kind] of node.parent || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { self.syntax.parent().and_then(${kind}::cast) }\n`;
}
for (const [method, kind] of node.first_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { support::child(&self.syntax) }\n`;
}
for (const [method, kind] of node.last_child || []) {
content += ` pub fn ${method}(&self) -> Option<${kind}> { super::last_child(&self.syntax) }\n`;
}
for (const [method, kind] of node.children || []) {
content += ` pub fn ${method}(&self) -> AstChildren<${kind}> { support::children(&self.syntax) }\n`;
}
if (node.post_blank) {
content += ` pub fn post_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.pre_blank) {
content += ` pub fn pre_blank(&self) -> usize { super::blank_lines(&self.syntax) }\n`;
}
if (node.affiliated_keywords) {
content += ` pub fn caption(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "CAPTION") }\n`;
content += ` pub fn header(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "HEADER") }\n`;
content += ` pub fn name(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "NAME") }\n`;
content += ` pub fn plot(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "PLOT") }\n`;
content += ` pub fn results(&self) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k == "RESULTS") }\n`;
content += ` pub fn attr(&self, backend: &str) -> Option<AffiliatedKeyword> { affiliated_keyword(&self.syntax, |k| k.starts_with("ATTR_") && &k[5..] == backend) }\n`;
}
content += `}\n`;
}
require("fs").writeFileSync(__dirname + "/generated.rs", content);

File diff suppressed because it is too large Load diff

View file

@ -1,276 +0,0 @@
use rowan::{ast::AstNode, NodeOrToken};
use crate::{syntax::SyntaxKind, SyntaxElement};
use super::{filter_token, Clock, Drawer, Headline, Section, Timestamp, Token};
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TodoType {
Todo,
Done,
}
impl Headline {
/// Return level of this headline
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* ").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 1);
/// let hdl = Org::parse("****** hello").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 6);
/// ```
pub fn level(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::HEADLINE_STARS))
.map_or_else(
|| {
debug_assert!(false, "headline must contains HEADLINE_STARS");
0
},
|stars| stars.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_keyword().unwrap(), "TODO");
/// ```
pub fn todo_keyword(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk)
if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO
|| tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE =>
{
Some(Token(tk))
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::{Headline, TodoType}};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Todo);
/// let hdl = Org::parse("*** DONE a").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.todo_type().unwrap(), TodoType::Done);
/// ```
pub fn todo_type(&self) -> Option<TodoType> {
self.syntax
.children_with_tokens()
.find_map(|elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_TODO => {
Some(TodoType::Todo)
}
NodeOrToken::Token(tk) if tk.kind() == SyntaxKind::HEADLINE_KEYWORD_DONE => {
Some(TodoType::Done)
}
_ => None,
})
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* TODO a").first_node::<Headline>().unwrap();
/// assert!(hdl.is_todo());
/// let hdl = Org::parse("* a").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_todo());
/// ```
pub fn is_todo(&self) -> bool {
matches!(self.todo_type(), Some(TodoType::Todo))
}
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* DONE a").first_node::<Headline>().unwrap();
/// assert!(hdl.is_done());
/// let hdl = Org::parse("* a").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_done());
/// ```
pub fn is_done(&self) -> bool {
matches!(self.todo_type(), Some(TodoType::Done))
}
/// Returns parsed title
///
/// ```rust
/// use orgize::{Org, ast::Headline, SyntaxKind};
///
/// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::<Headline>().unwrap();
/// let title = hdl.title().collect::<Vec<_>>();
/// assert_eq!(title[1].kind(), SyntaxKind::BOLD);
/// assert_eq!(title[1].to_string(), "*abc*");
/// assert_eq!(title[3].kind(), SyntaxKind::ITALIC);
/// assert_eq!(title[3].to_string(), "/abc/");
/// ```
pub fn title(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE)
.into_iter()
.flat_map(|n| n.children_with_tokens())
}
/// Returns title raw string
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("*** abc *abc* /abc/ :tag:").first_node::<Headline>().unwrap();
/// let title = hdl.title_raw();
/// assert_eq!(title, "abc *abc* /abc/ ");
/// ```
pub fn title_raw(&self) -> String {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TITLE)
.map(|n| n.to_string())
.unwrap_or_default()
}
/// Return `true` if this headline contains a COMMENT keyword
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* COMMENT").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* COMMENT hello").first_node::<Headline>().unwrap();
/// assert!(hdl.is_commented());
/// let hdl = Org::parse("* hello").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_commented());
/// ```
pub fn is_commented(&self) -> bool {
self.title()
.next()
.map(|first| {
if let Some(t) = first.as_token() {
let text = t.text();
t.kind() == SyntaxKind::TEXT
&& text.starts_with("COMMENT")
&& (text.len() == 7 || text[7..].starts_with(char::is_whitespace))
} else {
false
}
})
.unwrap_or_default()
}
/// Return `true` if this headline contains an archive tag
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* hello :ARCHIVE:").first_node::<Headline>().unwrap();
/// assert!(hdl.is_archived());
/// let hdl = Org::parse("* hello :ARCHIVED:").first_node::<Headline>().unwrap();
/// assert!(!hdl.is_archived());
/// ```
pub fn is_archived(&self) -> bool {
self.tags().any(|t| t == "ARCHIVE")
}
/// Returns this headline's closed timestamp, or `None` if not set.
pub fn closed(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.closed())
}
/// Returns this headline's scheduled timestamp, or `None` if not set.
pub fn scheduled(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.scheduled())
}
/// Returns this headline's deadline timestamp, or `None` if not set.
pub fn deadline(&self) -> Option<Timestamp> {
self.planning().and_then(|planning| planning.deadline())
}
/// Returns an iterator of text token in this tags
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let tags_vec = |input: &str| {
/// let hdl = Org::parse(input).first_node::<Headline>().unwrap();
/// let tags: Vec<_> = hdl.tags().map(|t| t.to_string()).collect();
/// tags
/// };
///
/// assert_eq!(tags_vec("* :tag:"), vec!["tag".to_string()]);
/// assert_eq!(tags_vec("* [#A] :::::a2%:"), vec!["a2%".to_string()]);
/// assert_eq!(tags_vec("* TODO :tag: :a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// assert_eq!(tags_vec("* title :tag:a2%:"), vec!["tag".to_string(), "a2%".to_string()]);
/// ```
pub fn tags(&self) -> impl Iterator<Item = Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_TAGS)
.into_iter()
.flat_map(|t| t.children_with_tokens())
.filter_map(filter_token(SyntaxKind::TEXT))
}
/// Returns priority text
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let hdl = Org::parse("* [#A]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "A");
/// let hdl = Org::parse("** DONE [#B]::").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "B");
/// let hdl = Org::parse("* [#破]").first_node::<Headline>().unwrap();
/// assert_eq!(hdl.priority().unwrap(), "破");
/// ```
pub fn priority(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::HEADLINE_PRIORITY)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
/// Returns an iterator of clock element affiliated with this headline
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let org = Org::parse(r#"* TODO
/// foo
/// :LOGBOOK:
/// bar
/// CLOCK:
/// CLOCK: [2024-10-12]
/// baz
/// CLOCK: [2024-10-12]
/// [2024-10-12]
/// :END:
/// foo"#);
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.clocks().count(), 2);
/// ```
pub fn clocks(&self) -> impl Iterator<Item = Clock> {
self.syntax
.children()
.flat_map(Section::cast)
.flat_map(|x| x.syntax.children().filter_map(Drawer::cast))
.filter(|d| d.name().eq_ignore_ascii_case("LOGBOOK"))
.filter_map(|d| {
d.syntax
.children()
.find(|children| children.kind() == SyntaxKind::DRAWER_CONTENT)
})
.flat_map(|x| x.children().filter_map(Clock::cast))
}
}

View file

@ -1,80 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, InlineCall, Token};
impl InlineCall {
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.call(), "square");
/// ```
pub fn call(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("inline call must contains two TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.inside_header().unwrap(), ":results output");
///
/// let call = Org::parse("call_square(4)[:results html]").first_node::<InlineCall>().unwrap();
/// assert!(call.inside_header().is_none());
/// ```
pub fn inside_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.take_while(|e| e.kind() != SyntaxKind::L_PARENS)
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square(4)").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.arguments(), "4");
/// ```
pub fn arguments(&self) -> Token {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_PARENS)
.find_map(filter_token(SyntaxKind::TEXT))
.expect("inline call must contains TEXT after L_PARENS")
}
///
/// ```rust
/// use orgize::{Org, ast::InlineCall};
///
/// let call = Org::parse("call_square[:results output](4)[:results html]").first_node::<InlineCall>().unwrap();
/// assert_eq!(call.end_header().unwrap(), ":results html");
///
/// let call = Org::parse("call_square[:results output](4)").first_node::<InlineCall>().unwrap();
/// assert!(call.end_header().is_none());
/// ```
pub fn end_header(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.skip(1)
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|e| {
debug_assert_eq!(e.kind(), SyntaxKind::TEXT);
Some(Token(e.into_token()?))
})
}
}

View file

@ -1,62 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, InlineSrc, Token};
impl InlineSrc {
/// Language of the code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "C");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.language(), "xml");
/// ```
pub fn language(&self) -> Token {
self.syntax
.children_with_tokens()
.nth(1)
.and_then(filter_token(SyntaxKind::TEXT))
.expect("inline src must contains TEXT")
}
/// Optional header arguments
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert!(s.parameters().is_none());
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.parameters().unwrap(), ":exports code");
/// ```
pub fn parameters(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.skip_while(|n| n.kind() != SyntaxKind::L_BRACKET)
.nth(1)
.and_then(|n| {
debug_assert_eq!(n.kind(), SyntaxKind::TEXT);
Some(Token(n.into_token()?))
})
}
/// Source code
///
/// ```rust
/// use orgize::{Org, ast::InlineSrc};
///
/// let s = Org::parse("src_C{int a = 0;}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "int a = 0;");
/// let s = Org::parse("src_xml[:exports code]{<tag>text</tag>}").first_node::<InlineSrc>().unwrap();
/// assert_eq!(s.value(), "<tag>text</tag>");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
.expect("inline src must contains TEXT")
}
}

View file

@ -1,36 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Keyword, Token};
impl Keyword {
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.key(), "KEY");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("keyword must contains TEXT")
}
///
/// ```rust
/// use orgize::{Org, ast::Keyword};
///
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), " VALUE");
/// let keyword = Org::parse("#+KEY:").first_node::<Keyword>().unwrap();
/// assert_eq!(keyword.value(), "");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("keyword must contains two TEXT")
}
}

View file

@ -1,121 +0,0 @@
use rowan::ast::AstNode;
use super::{token, AffiliatedKeyword, Link, Paragraph, Token};
use crate::{syntax::SyntaxKind, SyntaxElement};
impl Link {
/// Returns link destination
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[#id]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "#id");
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert_eq!(link.path(), "https://google.com");
/// ```
pub fn path(&self) -> Token {
token(&self.syntax, SyntaxKind::LINK_PATH).expect("link must contains LINK_PATH")
}
/// Returns `true` if link contains description
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.has_description());
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert!(link.has_description());
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// assert!(link.has_description());
/// ```
pub fn has_description(&self) -> bool {
self.syntax()
.children_with_tokens()
.any(|e| e.kind() == SyntaxKind::L_BRACKET)
}
/// Returns parsed description
///
/// Returns empty iterator if this link doesn't contain description
///
/// ```rust
/// use orgize::{Org, ast::Link, SyntaxKind};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description().count(), 0);
///
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// let description = link.description().collect::<Vec<_>>();
/// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::TEXT, "Google".into()));
///
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// let description = link.description().collect::<Vec<_>>();
/// assert_eq!((description[0].kind(), description[0].to_string()), (SyntaxKind::BOLD, "*abc*".into()));
/// assert_eq!((description[2].kind(), description[2].to_string()), (SyntaxKind::ITALIC, "/abc/".into()));
/// ```
pub fn description(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax()
.children_with_tokens()
.skip_while(|e| e.kind() != SyntaxKind::L_BRACKET)
.skip(1)
.take_while(|e| e.kind() != SyntaxKind::R_BRACKET2)
}
/// Returns description raw string
///
/// Returns empty string if this link doesn't contain description
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "");
/// let link = Org::parse("[[https://google.com][Google]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "Google");
/// let link = Org::parse("[[https://example.com][*abc* /abc/]]").first_node::<Link>().unwrap();
/// assert_eq!(link.description_raw(), "*abc* /abc/");
/// ```
pub fn description_raw(&self) -> String {
self.description()
.fold(String::new(), |acc, e| acc + &e.to_string())
}
/// Returns `true` if link is an image link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("[[https://google.com]]").first_node::<Link>().unwrap();
/// assert!(!link.is_image());
/// let link = Org::parse("[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert!(link.is_image());
/// ```
pub fn is_image(&self) -> bool {
const IMAGE_SUFFIX: &[&str] = &[
// https://github.com/bzg/org-mode/blob/7de1e818d5fbe6a05c6b1a007eed07dc27e7246b/lisp/ox.el#L253
".png", ".jpeg", ".jpg", ".gif", ".tiff", ".tif", ".xbm", ".xpm", ".pbm", ".pgm",
".ppm", ".webp", ".avif", ".svg",
];
let path = self.path();
IMAGE_SUFFIX.iter().any(|e| path.ends_with(e)) && !self.has_description()
}
/// Returns caption keyword in this link
///
/// ```rust
/// use orgize::{Org, ast::Link};
///
/// let link = Org::parse("#+CAPTION: image link\n[[file:/home/dominik/images/jupiter.jpg]]").first_node::<Link>().unwrap();
/// assert_eq!(link.caption().unwrap().value().unwrap(), " image link");
/// ```
pub fn caption(&self) -> Option<AffiliatedKeyword> {
// TODO: support other element type
Paragraph::cast(self.syntax.parent()?.clone())?.caption()
}
}

View file

@ -1,139 +0,0 @@
use super::{filter_token, List, ListItem, Token};
use crate::{syntax::SyntaxKind, SyntaxElement};
impl List {
/// Returns `true` if this list is an ordered link
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("+ 1").first_node::<List>().unwrap();
/// assert!(!list.is_ordered());
///
/// let list = Org::parse("1. 1").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
///
/// let list = Org::parse("1) 1\n- 2\n3. 3").first_node::<List>().unwrap();
/// assert!(list.is_ordered());
/// ```
pub fn is_ordered(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list muts contains LIST_ITEM");
false
},
|item| item.bullet().starts_with(|c: char| c.is_ascii_digit()),
)
}
/// Returns `true` if this list contains a TAG
///
/// ```rust
/// use orgize::{Org, ast::List};
///
/// let list = Org::parse("- some tag :: item 2.1").first_node::<List>().unwrap();
/// assert!(list.is_descriptive());
/// let list = Org::parse("2. [X] item 2").first_node::<List>().unwrap();
/// assert!(!list.is_descriptive());
/// ```
pub fn is_descriptive(&self) -> bool {
self.items().next().map_or_else(
|| {
debug_assert!(false, "list must contains LIST_ITEM");
false
},
|item| {
item.syntax
.children()
.any(|it| it.kind() == SyntaxKind::LIST_ITEM_TAG)
},
)
}
}
impl ListItem {
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 0);
/// let item = Org::parse(" \t * 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.indent(), 3);
/// ```
pub fn indent(&self) -> usize {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_INDENT))
.map_or_else(
|| {
debug_assert!(false, "list item must contains LIST_ITEM_INDENT");
0
},
|t| t.len(),
)
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- some tag").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "- ");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.bullet(), "2. ");
/// ```
pub fn bullet(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::LIST_ITEM_BULLET))
.expect("list item must contains LIST_ITEM_BULLET")
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("- [-] item 1").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "-");
/// let item = Org::parse("2. [X] item 2").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), "X");
/// let item = Org::parse("3) [ ] item 3").first_node::<ListItem>().unwrap();
/// assert_eq!(item.checkbox().unwrap(), " ");
/// ```
pub fn checkbox(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_CHECK_BOX)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
pub fn counter(&self) -> Option<Token> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_COUNTER)
.and_then(|n| {
n.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
})
}
/// ```rust
/// use orgize::{Org, ast::ListItem};
///
/// let item = Org::parse("+ this is *TAG* :: item1").first_node::<ListItem>().unwrap();
/// let tag = item.tag().map(|n| n.to_string()).collect::<String>();
/// assert_eq!(tag, "this is *TAG* ");
/// ```
pub fn tag(&self) -> impl Iterator<Item = SyntaxElement> {
self.syntax
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM_TAG)
.into_iter()
.flat_map(|n| {
n.children_with_tokens().filter(|n| {
n.kind() != SyntaxKind::WHITESPACE && n.kind() != SyntaxKind::COLON2
})
})
}
}

View file

@ -1,35 +0,0 @@
use crate::SyntaxKind;
use super::{filter_token, Macros, Token};
impl Macros {
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "title");
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.key(), "two_arg_macro");
/// ```
pub fn key(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("macros must contains TEXT")
}
/// ```rust
/// use orgize::{Org, ast::Macros};
///
/// let m = Org::parse("{{{title}}}").first_node::<Macros>().unwrap();
/// assert!(m.args().is_none());
/// let m = Org::parse("{{{two_arg_macro(1, 2)}}}").first_node::<Macros>().unwrap();
/// assert_eq!(m.args().unwrap(), "1, 2");
/// ```
pub fn args(&self) -> Option<Token> {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
}
}

View file

@ -1,176 +0,0 @@
mod generated;
mod affiliated_keyword;
mod block;
mod clock;
#[cfg(feature = "syntax-org-fc")]
mod cloze;
mod comment;
mod document;
mod drawer;
mod entity;
mod fixed_width;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;
mod planning;
mod snippet;
mod table;
mod timestamp;
#[cfg(feature = "syntax-org-fc")]
pub use cloze::*;
pub use generated::*;
pub use headline::*;
pub use rowan::ast::support::*;
pub use timestamp::*;
use crate::{
syntax::{SyntaxKind, SyntaxNode},
SyntaxToken,
};
use rowan::{ast::AstNode, NodeOrToken, TextRange, TextSize};
use std::{
borrow::{Borrow, Cow},
fmt,
hash::Hash,
ops::Deref,
};
pub fn blank_lines(parent: &SyntaxNode) -> usize {
parent
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::BLANK_LINE)
.count()
}
pub fn last_child<N: AstNode>(parent: &rowan::SyntaxNode<N::Language>) -> Option<N> {
parent.children().filter_map(N::cast).last()
}
pub fn last_token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
parent
.children_with_tokens()
.filter_map(filter_token(kind))
.last()
}
pub fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<Token> {
rowan::ast::support::token(parent, kind).map(Token)
}
pub fn filter_token(
kind: SyntaxKind,
) -> impl Fn(NodeOrToken<SyntaxNode, SyntaxToken>) -> Option<Token> {
move |elem| match elem {
NodeOrToken::Token(tk) if tk.kind() == kind => Some(Token(tk)),
_ => None,
}
}
/// A simple wrapper of `SyntaxToken`
///
/// It implements the `AsRef<str>` and `Display` trait,
/// allowing to directly use some `str` methods.
///
/// Also it implements `Hash` and `Eq` traits, so can be
/// used as keys in `HashMap`. However, note that it only
/// compares the underlying text inside `SyntaxToken`,
/// meaning two `Token`s from different positions
/// might be considered equal.
#[derive(Eq, Clone)]
pub struct Token(pub(crate) SyntaxToken);
impl Token {
pub fn syntax(&self) -> &SyntaxToken {
&self.0
}
/// Range of this token
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
/// Beginning position of this token
pub fn start(&self) -> TextSize {
self.0.text_range().start()
}
/// Ending position of this token
pub fn end(&self) -> TextSize {
self.0.text_range().end()
}
}
impl AsRef<str> for Token {
fn as_ref(&self) -> &str {
self.0.text()
}
}
impl Borrow<str> for Token {
fn borrow(&self) -> &str {
self.as_ref()
}
}
impl<'a> PartialEq<&'a str> for Token {
fn eq(&self, other: &&'a str) -> bool {
self.as_ref() == *other
}
}
impl PartialEq<String> for Token {
fn eq(&self, other: &String) -> bool {
self.as_ref() == other
}
}
impl PartialEq<Token> for Token {
fn eq(&self, other: &Token) -> bool {
self.as_ref() == other.as_ref()
}
}
impl Hash for Token {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.as_ref().hash(state)
}
}
impl<'a> PartialEq<Cow<'a, str>> for Token {
fn eq(&self, other: &Cow<'a, str>) -> bool {
self.as_ref() == other
}
}
impl PartialEq<str> for Token {
fn eq(&self, other: &str) -> bool {
self.as_ref() == other
}
}
impl Deref for Token {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_ref()
}
}
impl fmt::Debug for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.text(), f)
}
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.text(), f)
}
}

View file

@ -1,67 +0,0 @@
use rowan::ast::AstNode;
use super::{Planning, Timestamp};
use crate::syntax::SyntaxKind;
impl Planning {
/// Returns deadline timestamp
///
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nDEADLINE: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .deadline()
/// .unwrap();
/// assert_eq!(s.day_start().unwrap(), "08");
/// ```
pub fn deadline(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_DEADLINE)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns scheduled timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nSCHEDULED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .scheduled()
/// .unwrap();
/// assert_eq!(s.year_start().unwrap(), "2019");
/// ```
pub fn scheduled(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_SCHEDULED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
/// Returns closed timestamp
///
/// ```rust
/// use orgize::{ast::Planning, Org};
///
/// let s = Org::parse("* a\nCLOSED: <2019-04-08 Mon>")
/// .first_node::<Planning>()
/// .unwrap()
/// .closed()
/// .unwrap();
/// assert_eq!(s.month_start().unwrap(), "04");
/// ```
pub fn closed(&self) -> Option<Timestamp> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::PLANNING_CLOSED)
.last()
.and_then(|n| n.children().find_map(Timestamp::cast))
}
}

View file

@ -1,34 +0,0 @@
use crate::syntax::SyntaxKind;
use super::{filter_token, Snippet, Token};
impl Snippet {
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.backend(), "BACKEND");
/// ```
pub fn backend(&self) -> Token {
self.syntax
.children_with_tokens()
.find_map(filter_token(SyntaxKind::TEXT))
.expect("snippet must contains TEXT")
}
/// ```rust
/// use orgize::{Org, ast::Snippet};
///
/// let snippet = Org::parse("@@BACKEND:@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "");
/// let snippet = Org::parse("@@BACKEND:VALUE@@").first_node::<Snippet>().unwrap();
/// assert_eq!(snippet.value(), "VALUE");
/// ```
pub fn value(&self) -> Token {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.nth(1)
.expect("snippet must contains two TEXT")
}
}

View file

@ -1,110 +0,0 @@
use rowan::ast::AstNode;
use super::{filter_token, OrgTable, OrgTableRow, Token};
use crate::syntax::SyntaxKind;
impl OrgTable {
/// Returns `true` if this table has a header
///
/// A table has a header when it contains at least two row groups.
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let org = Org::parse(r#"
/// | a | b |
/// |---+---|
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | 0 | 1 |
/// |---+---|
/// | a | w |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(table.has_header());
///
/// let org = Org::parse(r#"
/// | a | b |
/// | c | d |"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
///
/// let org = Org::parse(r#"
/// |---+---|
/// | a | b |
/// | c | d |
/// |---+---|"#);
/// let table = org.first_node::<OrgTable>().unwrap();
/// assert!(!table.has_header());
/// ```
pub fn has_header(&self) -> bool {
self.syntax
.children()
.filter_map(OrgTableRow::cast)
.skip_while(|row| row.is_rule())
.skip_while(|row| row.is_standard())
.any(|row| !row.is_rule())
}
/// Formulas associated to the table
///
/// ```rust
/// use orgize::{Org, ast::OrgTable};
///
/// let table = Org::parse("| a |").first_node::<OrgTable>().unwrap();
/// assert_eq!(table.tblfm().count(), 0);
///
/// let table = Org::parse("| a |\n#+tblfm: test").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 1);
/// assert_eq!(tblfm[0], " test");
///
/// let table = Org::parse("| a |\n#+TBLFM: test1\n#+TBLFM: test2").first_node::<OrgTable>().unwrap();
/// let tblfm = table.tblfm().collect::<Vec<_>>();
/// assert_eq!(tblfm.len(), 2);
/// assert_eq!(tblfm[0], " test1");
/// assert_eq!(tblfm[1], " test2");
/// ```
pub fn tblfm(&self) -> impl Iterator<Item = Token> {
self.syntax.children().filter_map(|n| {
if n.kind() == SyntaxKind::KEYWORD {
n.children_with_tokens()
.filter_map(filter_token(SyntaxKind::TEXT))
.last()
} else {
None
}
})
}
}
impl OrgTableRow {
/// Returns `true` if this row is a rule
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|----|----|\n|Foo |Bar |");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_rule());
/// ```
pub fn is_rule(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_RULE_ROW
}
/// Returns `true` if this row is a standard row
///
/// ```rust
/// use orgize::{Org, ast::OrgTableRow};
///
/// let org = Org::parse("|Foo |Bar |\n|----|----|");
/// let row = org.first_node::<OrgTableRow>().unwrap();
/// assert!(row.is_standard());
/// ```
pub fn is_standard(&self) -> bool {
self.syntax.kind() == SyntaxKind::ORG_TABLE_STANDARD_ROW
}
}

View file

@ -1,301 +0,0 @@
use super::{filter_token, Timestamp};
use crate::syntax::SyntaxKind;
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum DelayType {
All,
First,
}
impl Timestamp {
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<2003-09-16 Tue 09:39-10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>--<2003-09-16 Tue 10:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// let ts = Org::parse("<2003-09-16 Tue 09:39>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_active());
/// ```
pub fn is_active(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_ACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_inactive());
/// ```
pub fn is_inactive(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_INACTIVE
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("<%%(org-calendar-holiday)>").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_diary());
/// ```
pub fn is_diary(&self) -> bool {
self.syntax.kind() == SyntaxKind::TIMESTAMP_DIARY
}
/// Returns `true` if this timestamp has a range
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16]--[2003-09-16]").first_node::<Timestamp>().unwrap();
/// assert!(ts.is_range());
/// let ts = Org::parse("[2003-09-16 Tue 09:39]").first_node::<Timestamp>().unwrap();
/// assert!(!ts.is_range());
/// ```
pub fn is_range(&self) -> bool {
self.syntax
.children_with_tokens()
.filter_map(filter_token(SyntaxKind::MINUS))
.count()
> 2
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, RepeaterType}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Cumulate));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), Some(RepeaterType::Restart));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_type(), None);
/// ```
pub fn repeater_type(&self) -> Option<RepeaterType> {
self.nth_repeater(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(1));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), Some(10));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_value(), None);
/// ```
pub fn repeater_value(&self) -> Option<u32> {
self.nth_repeater(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 .+10d +1w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), Some(TimeUnit::Day));
/// let t = Org::parse("[2000-01-01 --1y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.repeater_unit(), None);
/// ```
pub fn repeater_unit(&self) -> Option<TimeUnit> {
self.nth_repeater(0).map(|i| i.2)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, DelayType}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::All));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_type(), Some(DelayType::First));
/// ```
pub fn warning_type(&self) -> Option<DelayType> {
self.nth_delay(0).map(|i| i.0)
}
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(3));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(5));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_value(), Some(10));
/// ```
pub fn warning_value(&self) -> Option<u32> {
self.nth_delay(0).map(|i| i.1)
}
/// ```rust
/// use orgize::{Org, ast::{Timestamp, TimeUnit}};
///
/// let t = Org::parse("[2000-01-01 -3y]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Year));
/// let t = Org::parse("[2000-01-01]--[2000-01-02 -5w]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Week));
/// let t = Org::parse("[2000-01-01 01:00-02:00 --10m]").first_node::<Timestamp>().unwrap();
/// assert_eq!(t.warning_unit(), Some(TimeUnit::Month));
/// ```
pub fn warning_unit(&self) -> Option<TimeUnit> {
self.nth_delay(0).map(|i| i.2)
}
fn nth_repeater(&self, nth: usize) -> Option<(RepeaterType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_REPEATER_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"++" => Some(RepeaterType::CatchUp),
"+" => Some(RepeaterType::Cumulate),
".+" => Some(RepeaterType::Restart),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
fn nth_delay(&self, nth: usize) -> Option<(DelayType, u32, TimeUnit)> {
let mut i = nth + 1;
let mut iter = self.syntax.children_with_tokens().skip_while(|n| {
if n.kind() == SyntaxKind::TIMESTAMP_DELAY_MARK {
i -= 1;
i != 0
} else {
true
}
});
let mark = iter.next().and_then(|n| match n.as_token()?.text() {
"-" => Some(DelayType::All),
"--" => Some(DelayType::First),
_ => None,
})?;
let value = iter
.next()
.and_then(|n| n.as_token()?.text().parse::<u32>().ok())?;
let unit = iter.next().and_then(|n| match n.as_token()?.text() {
"h" => Some(TimeUnit::Hour),
"d" => Some(TimeUnit::Day),
"w" => Some(TimeUnit::Week),
"m" => Some(TimeUnit::Month),
"y" => Some(TimeUnit::Year),
_ => None,
})?;
Some((mark, value, unit))
}
/// Converts timestamp start to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.start_to_chrono().unwrap(), "2003-09-16T09:39:00".parse::<NaiveDateTime>().unwrap());
///
/// let ts = Org::parse("[2003-13-00 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert!(ts.start_to_chrono().is_none());
/// ```
#[cfg(feature = "chrono")]
pub fn start_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_start()?.parse().ok()?,
self.month_start()?.parse().ok()?,
self.day_start()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_start()?.parse().ok()?,
self.minute_start()?.parse().ok()?,
0,
)?,
))
}
/// Converts timestamp end to chrono NaiveDateTime
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
/// use chrono::NaiveDateTime;
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.end_to_chrono().unwrap(), "2003-09-16T10:39:00".parse::<NaiveDateTime>().unwrap());
/// ```
#[cfg(feature = "chrono")]
pub fn end_to_chrono(&self) -> Option<chrono::NaiveDateTime> {
Some(chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(
self.year_end()?.parse().ok()?,
self.month_end()?.parse().ok()?,
self.day_end()?.parse().ok()?,
)?,
chrono::NaiveTime::from_hms_opt(
self.hour_end()?.parse().ok()?,
self.minute_end()?.parse().ok()?,
0,
)?,
))
}
/// Returns chrono::TimeDelta between timestamp start and end
///
/// ```rust
/// use orgize::{Org, ast::Timestamp};
///
/// let ts = Org::parse("[2003-09-16 Tue 09:39-10:39]").first_node::<Timestamp>().unwrap();
/// assert_eq!(ts.time_delta().unwrap().num_hours(), 1);
/// ```
#[cfg(feature = "chrono")]
pub fn time_delta(&self) -> Option<chrono::TimeDelta> {
Some(self.end_to_chrono()? - self.start_to_chrono()?)
}
}

View file

@ -1,87 +1,18 @@
use crate::syntax::document::document_node;
use crate::Org;
#[derive(Clone, Debug)]
pub enum UseSubSuperscript {
Nil,
Brace,
True,
}
impl UseSubSuperscript {
pub fn is_nil(&self) -> bool {
matches!(self, UseSubSuperscript::Nil)
}
pub fn is_true(&self) -> bool {
matches!(self, UseSubSuperscript::True)
}
pub fn is_brace(&self) -> bool {
matches!(self, UseSubSuperscript::Brace)
}
}
/// Parse configuration
#[derive(Clone, Debug)]
pub struct ParseConfig {
/// Headline's todo keywords
pub todo_keywords: (Vec<String>, Vec<String>),
pub dual_keywords: Vec<String>,
pub parsed_keywords: Vec<String>,
/// Control sub/superscript parsing
///
/// Equivalent to `org-use-sub-superscripts`
///
/// - `UseSubSuperscript::Nil`: disable parsing
/// - `UseSubSuperscript::True`: enable parsing
/// - `UseSubSuperscript::Brace`: enable parsing, but braces are required
pub use_sub_superscript: UseSubSuperscript,
/// Affiliated keywords
///
/// Equivalent to [`org-element-affiliated-keywords`](https://git.sr.ht/~bzg/org-mode/tree/6f960f3c6a4dfe137fbd33fef9f7dadfd229600c/item/lisp/org-element.el#L331)
pub affiliated_keywords: Vec<String>,
}
impl ParseConfig {
/// Parses input with current config
pub fn parse(self, input: impl AsRef<str>) -> Org {
let input = (input.as_ref(), &self).into();
let node = document_node(input).unwrap().1;
Org {
config: self,
green: node.into_node().unwrap(),
}
}
}
impl Default for ParseConfig {
fn default() -> Self {
ParseConfig {
todo_keywords: (vec!["TODO".into()], vec!["DONE".into()]),
dual_keywords: vec!["CAPTION".into(), "RESULTS".into()],
parsed_keywords: vec!["CAPTION".into()],
use_sub_superscript: UseSubSuperscript::True,
affiliated_keywords: vec![
"CAPTION".into(),
"DATA".into(),
"HEADER".into(),
"HEADERS".into(),
"LABEL".into(),
"NAME".into(),
"PLOT".into(),
"RESNAME".into(),
"RESULT".into(),
"RESULTS".into(),
"SOURCE".into(),
"SRCNAME".into(),
"TBLNAME".into(),
],
todo_keywords: (vec![String::from("TODO")], vec![String::from("DONE")]),
}
}
}
lazy_static::lazy_static! {
pub static ref DEFAULT_CONFIG: ParseConfig = ParseConfig::default();
}

408
src/elements/block.rs Normal file
View file

@ -0,0 +1,408 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0},
sequence::preceded,
IResult,
};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Special Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct SpecialBlock<'a> {
/// Block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Block name
pub name: Cow<'a, str>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl SpecialBlock<'_> {
pub fn into_owned(self) -> SpecialBlock<'static> {
SpecialBlock {
name: self.name.into_owned().into(),
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Quote Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct QuoteBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl QuoteBlock<'_> {
pub fn into_owned(self) -> QuoteBlock<'static> {
QuoteBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Center Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct CenterBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl CenterBlock<'_> {
pub fn into_owned(self) -> CenterBlock<'static> {
CenterBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Verse Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct VerseBlock<'a> {
/// Optional block parameters
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub parameters: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl VerseBlock<'_> {
pub fn into_owned(self) -> VerseBlock<'static> {
VerseBlock {
parameters: self.parameters.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
/// Comment Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct CommentBlock<'a> {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub data: Option<Cow<'a, str>>,
/// Comment block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl CommentBlock<'_> {
pub fn into_owned(self) -> CommentBlock<'static> {
CommentBlock {
data: self.data.map(Into::into).map(Cow::Owned),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Example Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct ExampleBlock<'a> {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub data: Option<Cow<'a, str>>,
/// Block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl ExampleBlock<'_> {
pub fn into_owned(self) -> ExampleBlock<'static> {
ExampleBlock {
data: self.data.map(Into::into).map(Cow::Owned),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Export Block Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct ExportBlock<'a> {
pub data: Cow<'a, str>,
/// Block contents
pub contents: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl ExportBlock<'_> {
pub fn into_owned(self) -> ExportBlock<'static> {
ExportBlock {
data: self.data.into_owned().into(),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Src Block Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct SourceBlock<'a> {
/// Block contents
pub contents: Cow<'a, str>,
/// Language of the code in the block
pub language: Cow<'a, str>,
pub arguments: Cow<'a, str>,
/// Numbers of blank lines between last block's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl SourceBlock<'_> {
pub fn into_owned(self) -> SourceBlock<'static> {
SourceBlock {
language: self.language.into_owned().into(),
arguments: self.arguments.into_owned().into(),
contents: self.contents.into_owned().into(),
post_blank: self.post_blank,
}
}
// TODO: fn number_lines() -> Some(New) | Some(Continued) | None { }
// TODO: fn preserve_indent() -> bool { }
// TODO: fn use_labels() -> bool { }
// TODO: fn label_fmt() -> Option<String> { }
// TODO: fn retain_labels() -> bool { }
}
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawBlock<'a> {
pub name: &'a str,
pub arguments: &'a str,
pub pre_blank: usize,
pub contents: &'a str,
pub contents_without_blank_lines: &'a str,
pub post_blank: usize,
}
impl<'a> RawBlock<'a> {
pub fn parse(input: &str) -> Option<(&str, RawBlock)> {
parse_internal(input).ok()
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let RawBlock {
name,
contents,
arguments,
pre_blank,
contents_without_blank_lines,
post_blank,
} = self;
let arguments: Option<Cow<'a, str>> = if arguments.is_empty() {
None
} else {
Some(arguments.into())
};
let element = match &*name.to_uppercase() {
"CENTER" => CenterBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"QUOTE" => QuoteBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"VERSE" => VerseBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"COMMENT" => CommentBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXAMPLE" => ExampleBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXPORT" => ExportBlock {
data: arguments.unwrap_or_default(),
contents: contents.into(),
post_blank,
}
.into(),
"SRC" => {
let (language, arguments) = match &arguments {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!(
"`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"
),
};
SourceBlock {
arguments,
language,
contents: contents.into(),
post_blank,
}
.into()
}
_ => SpecialBlock {
parameters: arguments,
name: name.into(),
pre_blank,
post_blank,
}
.into(),
};
(element, contents_without_blank_lines)
}
}
fn parse_internal(input: &str) -> IResult<&str, RawBlock, ()> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, arguments) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?;
let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawBlock {
name,
contents,
arguments: arguments.trim(),
pre_blank,
contents_without_blank_lines,
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
RawBlock::parse(
r#"#+BEGIN_SRC
#+END_SRC"#
),
Some((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "SRC".into(),
arguments: ""
}
))
);
assert_eq!(
RawBlock::parse(
r#"#+begin_src
#+end_src"#
),
Some((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "src".into(),
arguments: ""
}
))
);
assert_eq!(
RawBlock::parse(
r#"#+BEGIN_SRC javascript
console.log('Hello World!');
#+END_SRC
"#
),
Some((
"",
RawBlock {
contents: "console.log('Hello World!');\n",
contents_without_blank_lines: "console.log('Hello World!');\n",
pre_blank: 0,
post_blank: 1,
name: "SRC".into(),
arguments: "javascript"
}
))
);
// TODO: more testing
}

242
src/elements/clock.rs Normal file
View file

@ -0,0 +1,242 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag,
character::complete::{char, digit1, space0},
combinator::recognize,
sequence::separated_pair,
IResult,
};
use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp};
use crate::parse::combinators::{blank_lines_count, eol};
/// Clock Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(untagged))]
#[derive(Debug, Clone)]
pub enum Clock<'a> {
/// Closed Clock
Closed {
/// Time start
start: Datetime<'a>,
/// Time end
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
/// Clock duration
duration: Cow<'a, str>,
/// Numbers of blank lines between the clock line and next non-blank
/// line or buffer's end
post_blank: usize,
},
/// Running Clock
Running {
/// Time start
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
/// Numbers of blank lines between the clock line and next non-blank
/// line or buffer's end
post_blank: usize,
},
}
impl Clock<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Clock)> {
parse_internal(input).ok()
}
pub fn into_onwed(self) -> Clock<'static> {
match self {
Clock::Closed {
start,
end,
repeater,
delay,
duration,
post_blank,
} => Clock::Closed {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
duration: duration.into_owned().into(),
post_blank,
},
Clock::Running {
start,
repeater,
delay,
post_blank,
} => Clock::Running {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
post_blank,
},
}
}
/// Returns `true` if the clock is running.
pub fn is_running(&self) -> bool {
match self {
Clock::Closed { .. } => false,
Clock::Running { .. } => true,
}
}
/// Returns `true` if the clock is closed.
pub fn is_closed(&self) -> bool {
match self {
Clock::Closed { .. } => true,
Clock::Running { .. } => false,
}
}
/// Returns clock duration, or `None` if it's running.
pub fn duration(&self) -> Option<&str> {
match self {
Clock::Closed { duration, .. } => Some(duration),
Clock::Running { .. } => None,
}
}
/// Constructs a timestamp from the clock.
pub fn value(&self) -> Timestamp {
match &*self {
Clock::Closed {
start,
end,
repeater,
delay,
..
} => Timestamp::InactiveRange {
start: start.clone(),
end: end.clone(),
repeater: repeater.clone(),
delay: delay.clone(),
},
Clock::Running {
start,
repeater,
delay,
..
} => Timestamp::Inactive {
start: start.clone(),
repeater: repeater.clone(),
delay: delay.clone(),
},
}
}
}
fn parse_internal(input: &str) -> IResult<&str, Clock, ()> {
let (input, _) = space0(input)?;
let (input, _) = tag("CLOCK:")(input)?;
let (input, _) = space0(input)?;
let (input, timestamp) = parse_inactive(input)?;
match timestamp {
Timestamp::InactiveRange {
start,
end,
repeater,
delay,
} => {
let (input, _) = space0(input)?;
let (input, _) = tag("=>")(input)?;
let (input, _) = space0(input)?;
let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?;
let (input, _) = eol(input)?;
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Closed {
start,
end,
repeater,
delay,
duration: duration.into(),
post_blank: blank,
},
))
}
Timestamp::Inactive {
start,
repeater,
delay,
} => {
let (input, _) = eol(input)?;
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Running {
start,
repeater,
delay,
post_blank: blank,
},
))
}
_ => unreachable!(
"`parse_inactive` only returns `Timestamp::InactiveRange` or `Timestamp::Inactive`."
),
}
}
#[test]
fn parse() {
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"),
Some((
"",
Clock::Running {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
repeater: None,
delay: None,
post_blank: 0,
}
))
);
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n"),
Some((
"",
Clock::Closed {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39)
},
repeater: None,
delay: None,
duration: "1:00".into(),
post_blank: 1,
}
))
);
}

53
src/elements/comment.rs Normal file
View file

@ -0,0 +1,53 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default, Clone)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Comment<'a> {
/// Comments value, with pound signs
pub value: Cow<'a, str>,
/// Numbers of blank lines between last comment's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Comment<'static> {
Comment {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, Comment, ()> {
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
Comment {
value: value.into(),
post_blank,
},
))
}

122
src/elements/cookie.rs Normal file
View file

@ -0,0 +1,122 @@
use std::borrow::Cow;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::recognize,
sequence::{delimited, pair, separated_pair},
IResult,
};
/// Statistics Cookie Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Cookie<'a> {
/// Full cookie value
pub value: Cow<'a, str>,
}
impl Cookie<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Cookie)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Cookie<'static> {
Cookie {
value: self.value.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Cookie, ()> {
let (input, value) = recognize(delimited(
tag("["),
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
tag("]"),
))(input)?;
Ok((
input,
Cookie {
value: value.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Cookie::parse("[1/10]"),
Some((
"",
Cookie {
value: "[1/10]".into()
}
))
);
assert_eq!(
Cookie::parse("[1/1000]"),
Some((
"",
Cookie {
value: "[1/1000]".into()
}
))
);
assert_eq!(
Cookie::parse("[10%]"),
Some((
"",
Cookie {
value: "[10%]".into()
}
))
);
assert_eq!(
Cookie::parse("[%]"),
Some((
"",
Cookie {
value: "[%]".into()
}
))
);
assert_eq!(
Cookie::parse("[/]"),
Some((
"",
Cookie {
value: "[/]".into()
}
))
);
assert_eq!(
Cookie::parse("[100/]"),
Some((
"",
Cookie {
value: "[100/]".into()
}
))
);
assert_eq!(
Cookie::parse("[/100]"),
Some((
"",
Cookie {
value: "[/100]".into()
}
))
);
assert!(Cookie::parse("[10% ]").is_none());
assert!(Cookie::parse("[1//100]").is_none());
assert!(Cookie::parse("[1\\100]").is_none());
assert!(Cookie::parse("[10%%]").is_none());
}

121
src/elements/drawer.rs Normal file
View file

@ -0,0 +1,121 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while1},
character::complete::space0,
sequence::delimited,
IResult,
};
use crate::parse::combinators::{blank_lines_count, eol, lines_till};
/// Drawer Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Drawer<'a> {
/// Drawer name
pub name: Cow<'a, str>,
/// Numbers of blank lines between first drawer's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last drawer's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl Drawer<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (Drawer, &str))> {
parse_drawer(input).ok()
}
pub fn into_owned(self) -> Drawer<'static> {
Drawer {
name: self.name.into_owned().into(),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
#[inline]
pub fn parse_drawer(input: &str) -> IResult<&str, (Drawer, &str), ()> {
let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?;
let (content, blank) = blank_lines_count(content)?;
drawer.pre_blank = blank;
let (input, blank) = blank_lines_count(input)?;
drawer.post_blank = blank;
Ok((input, (drawer, content)))
}
pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str), ()> {
let (input, _) = space0(input)?;
let (input, name) = delimited(
tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
Ok((
input,
(
Drawer {
name: name.into(),
pre_blank: 0,
post_blank: 0,
},
contents,
),
))
}
#[test]
fn parse() {
assert_eq!(
parse_drawer(
r#":PROPERTIES:
:CUSTOM_ID: id
:END:"#
),
Ok((
"",
(
Drawer {
name: "PROPERTIES".into(),
pre_blank: 0,
post_blank: 0
},
" :CUSTOM_ID: id\n"
)
))
);
assert_eq!(
parse_drawer(
r#":PROPERTIES:
:END:
"#
),
Ok((
"",
(
Drawer {
name: "PROPERTIES".into(),
pre_blank: 2,
post_blank: 1,
},
""
)
))
);
// https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer(":SPAGHETTI:\n").is_err());
}

99
src/elements/dyn_block.rs Normal file
View file

@ -0,0 +1,99 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0, space1},
IResult,
};
use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Dynamic Block Element
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct DynBlock<'a> {
/// Block name
pub block_name: Cow<'a, str>,
/// Block argument
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub arguments: Option<Cow<'a, str>>,
/// Numbers of blank lines between first block's line and next non-blank
/// line
pub pre_blank: usize,
/// Numbers of blank lines between last drawer's line and next non-blank
/// line or buffer's end
pub post_blank: usize,
}
impl DynBlock<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (DynBlock, &str))> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> DynBlock<'static> {
DynBlock {
block_name: self.block_name.into_owned().into(),
arguments: self.arguments.map(Into::into).map(Cow::Owned),
pre_blank: self.pre_blank,
post_blank: self.post_blank,
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, (DynBlock, &str), ()> {
let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;
let (input, args) = line(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?;
let (contents, pre_blank) = blank_lines_count(contents)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
DynBlock {
block_name: name.into(),
arguments: if args.trim().is_empty() {
None
} else {
Some(args.trim().into())
},
pre_blank,
post_blank,
},
contents,
),
))
}
#[test]
fn parse() {
// TODO: testing
assert_eq!(
DynBlock::parse(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#
),
Some((
"",
(
DynBlock {
block_name: "clocktable".into(),
arguments: Some(":scope file".into()),
pre_blank: 2,
post_blank: 1,
},
"CONTENTS\n"
)
))
);
}

113
src/elements/emphasis.rs Normal file
View file

@ -0,0 +1,113 @@
use bytecount::count;
use memchr::memchr_iter;
use crate::elements::Element;
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct Emphasis<'a> {
marker: u8,
contents: &'a str,
}
impl<'a> Emphasis<'a> {
pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> {
if text.len() < 3 {
return None;
}
let bytes = text.as_bytes();
if bytes[1].is_ascii_whitespace() {
return None;
}
for i in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if i == 1 {
continue;
} else if count(&bytes[1..i], b'\n') >= 2 {
break;
} else if validate_marker(i, text) {
return Some((
&text[i + 1..],
Emphasis {
marker,
contents: &text[1..i],
},
));
}
}
None
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let Emphasis { marker, contents } = self;
let element = match marker {
b'*' => Element::Bold,
b'+' => Element::Strike,
b'/' => Element::Italic,
b'_' => Element::Underline,
b'=' => Element::Verbatim {
value: contents.into(),
},
b'~' => Element::Code {
value: contents.into(),
},
_ => unreachable!(),
};
(element, contents)
}
}
fn validate_marker(pos: usize, text: &str) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(&post) = text.as_bytes().get(pos + 1) {
match post {
b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true,
_ => false,
}
} else {
true
}
}
#[test]
fn parse() {
assert_eq!(
Emphasis::parse("*bold*", b'*'),
Some((
"",
Emphasis {
contents: "bold",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo*ld*", b'*'),
Some((
"",
Emphasis {
contents: "bo*ld",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo\nld*", b'*'),
Some((
"",
Emphasis {
contents: "bo\nld",
marker: b'*'
}
))
);
assert_eq!(Emphasis::parse("*bold*a", b'*'), None);
assert_eq!(Emphasis::parse("*bold*", b'/'), None);
assert_eq!(Emphasis::parse("*bold *", b'*'), None);
assert_eq!(Emphasis::parse("* bold*", b'*'), None);
assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None);
}

View file

@ -0,0 +1,80 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct FixedWidth<'a> {
/// Fixed width value
pub value: Cow<'a, str>,
/// Numbers of blank lines between last fixed width's line and next
/// non-blank line or buffer's end
pub post_blank: usize,
}
impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FixedWidth<'static> {
FixedWidth {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, FixedWidth, ()> {
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
FixedWidth {
value: value.into(),
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
FixedWidth::parse(
r#": A
:
: B
: C
"#
),
Some((
"",
FixedWidth {
value: r#": A
:
: B
: C
"#
.into(),
post_blank: 1
}
))
);
}

117
src/elements/fn_def.rs Normal file
View file

@ -0,0 +1,117 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while1},
sequence::delimited,
IResult,
};
use crate::parse::combinators::{blank_lines_count, line};
/// Footnote Definition Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Default, Clone)]
pub struct FnDef<'a> {
/// Footnote label, used for reference
pub label: Cow<'a, str>,
/// Numbers of blank lines between last footnote definition's line and next
/// non-blank line or buffer's end
pub post_blank: usize,
}
impl FnDef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FnDef<'static> {
FnDef {
label: self.label.into_owned().into(),
post_blank: self.post_blank,
}
}
}
fn parse_internal(input: &str) -> IResult<&str, (FnDef, &str), ()> {
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
FnDef {
label: label.into(),
post_blank,
},
content,
),
))
}
#[test]
fn parse() {
assert_eq!(
FnDef::parse("[fn:1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "1".into(),
post_blank: 0
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:word_1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "word_1".into(),
post_blank: 0,
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:WORD-1] https://orgmode.org"),
Some((
"",
(
FnDef {
label: "WORD-1".into(),
post_blank: 0,
},
" https://orgmode.org"
)
))
);
assert_eq!(
FnDef::parse("[fn:WORD]"),
Some((
"",
(
FnDef {
label: "WORD".into(),
post_blank: 0,
},
""
)
))
);
assert!(FnDef::parse("[fn:] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none());
}

111
src/elements/fn_ref.rs Normal file
View file

@ -0,0 +1,111 @@
use std::borrow::Cow;
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
error::{make_error, ErrorKind},
sequence::preceded,
Err, IResult,
};
/// Footnote Reference Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct FnRef<'a> {
/// Footnote label
pub label: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub definition: Option<Cow<'a, str>>,
}
impl FnRef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FnRef)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> FnRef<'static> {
FnRef {
label: self.label.into_owned().into(),
definition: self.definition.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, FnRef, ()> {
let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?;
let (input, _) = tag("]")(input)?;
Ok((
input,
FnRef {
label: label.into(),
definition: definition.map(Into::into),
},
))
}
fn balanced_brackets(input: &str) -> IResult<&str, &str, ()> {
let mut pairs = 1;
for i in memchr2_iter(b'[', b']', input.as_bytes()) {
if input.as_bytes()[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok((&input[i..], &input[0..i]));
}
}
Err(Err::Error(make_error(input, ErrorKind::Tag)))
}
#[test]
fn parse() {
assert_eq!(
FnRef::parse("[fn:1]"),
Some((
"",
FnRef {
label: "1".into(),
definition: None
},
))
);
assert_eq!(
FnRef::parse("[fn:1:2]"),
Some((
"",
FnRef {
label: "1".into(),
definition: Some("2".into())
},
))
);
assert_eq!(
FnRef::parse("[fn::2]"),
Some((
"",
FnRef {
label: "".into(),
definition: Some("2".into())
},
))
);
assert_eq!(
FnRef::parse("[fn::[]]"),
Some((
"",
FnRef {
label: "".into(),
definition: Some("[]".into())
},
))
);
assert!(FnRef::parse("[fn::[]").is_none());
}

122
src/elements/inline_call.rs Normal file
View file

@ -0,0 +1,122 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till},
combinator::opt,
sequence::{delimited, preceded},
IResult,
};
/// Inline Babel Call Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Default, Clone)]
pub struct InlineCall<'a> {
/// Called code block name
pub name: Cow<'a, str>,
/// Header arguments applied to the code block
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub inside_header: Option<Cow<'a, str>>,
/// Argument passed to the code block
pub arguments: Cow<'a, str>,
/// Header arguments applied to the calling instance
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub end_header: Option<Cow<'a, str>>,
}
impl InlineCall<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, InlineCall)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> InlineCall<'static> {
InlineCall {
name: self.name.into_owned().into(),
arguments: self.arguments.into_owned().into(),
inside_header: self.inside_header.map(Into::into).map(Cow::Owned),
end_header: self.end_header.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, InlineCall, ()> {
let (input, name) = preceded(
tag("call_"),
take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'),
)(input)?;
let (input, inside_header) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, arguments) =
delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?;
let (input, end_header) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
Ok((
input,
InlineCall {
name: name.into(),
arguments: arguments.into(),
inside_header: inside_header.map(Into::into),
end_header: end_header.map(Into::into),
},
))
}
#[test]
fn parse() {
assert_eq!(
InlineCall::parse("call_square(4)"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: None,
end_header: None,
}
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: Some(":results output".into()),
end_header: None,
},
))
);
assert_eq!(
InlineCall::parse("call_square(4)[:results html]"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: None,
end_header: Some(":results html".into()),
},
))
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)[:results html]"),
Some((
"",
InlineCall {
name: "square".into(),
arguments: "4".into(),
inside_header: Some(":results output".into()),
end_header: Some(":results html".into()),
},
))
);
}

View file

@ -0,0 +1,88 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till, take_while1},
combinator::opt,
sequence::delimited,
IResult,
};
/// Inline Src Block Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct InlineSrc<'a> {
/// Language of the code
pub lang: Cow<'a, str>,
/// Optional header arguments
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub options: Option<Cow<'a, str>>,
/// Source code
pub body: Cow<'a, str>,
}
impl InlineSrc<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, InlineSrc)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> InlineSrc<'static> {
InlineSrc {
lang: self.lang.into_owned().into(),
options: self.options.map(Into::into).map(Cow::Owned),
body: self.body.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, InlineSrc, ()> {
let (input, _) = tag("src_")(input)?;
let (input, lang) =
take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{')(input)?;
let (input, options) = opt(delimited(
tag("["),
take_till(|c| c == '\n' || c == ']'),
tag("]"),
))(input)?;
let (input, body) = delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?;
Ok((
input,
InlineSrc {
lang: lang.into(),
options: options.map(Into::into),
body: body.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
InlineSrc::parse("src_C{int a = 0;}"),
Some((
"",
InlineSrc {
lang: "C".into(),
options: None,
body: "int a = 0;".into()
},
))
);
assert_eq!(
InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>}"),
Some((
"",
InlineSrc {
lang: "xml".into(),
options: Some(":exports code".into()),
body: "<tag>text</tag>".into(),
},
))
);
assert!(InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>").is_none());
assert!(InlineSrc::parse("src_[:exports code]{<tag>text</tag>}").is_none());
assert!(InlineSrc::parse("src_xml[:exports code]").is_none());
}

230
src/elements/keyword.rs Normal file
View file

@ -0,0 +1,230 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till},
character::complete::space0,
combinator::opt,
sequence::delimited,
IResult,
};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line};
/// Keyword Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Keyword<'a> {
/// Keyword name
pub key: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub optional: Option<Cow<'a, str>>,
/// Keyword value
pub value: Cow<'a, str>,
/// Numbers of blank lines between keyword line and next non-blank line or
/// buffer's end
pub post_blank: usize,
}
impl Keyword<'_> {
pub fn into_owned(self) -> Keyword<'static> {
Keyword {
key: self.key.into_owned().into(),
optional: self.optional.map(Into::into).map(Cow::Owned),
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
/// Babel Call Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct BabelCall<'a> {
/// Babel call value
pub value: Cow<'a, str>,
/// Numbers of blank lines between babel call line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl BabelCall<'_> {
pub fn into_owned(self) -> BabelCall<'static> {
BabelCall {
value: self.value.into_owned().into(),
post_blank: self.post_blank,
}
}
}
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawKeyword<'a> {
pub key: &'a str,
pub value: &'a str,
pub optional: Option<&'a str>,
pub post_blank: usize,
}
impl<'a> RawKeyword<'a> {
pub fn parse(input: &str) -> Option<(&str, RawKeyword)> {
parse_internal(input).ok()
}
pub fn into_element(self) -> Element<'a> {
let RawKeyword {
key,
value,
optional,
post_blank,
} = self;
if (&*key).eq_ignore_ascii_case("CALL") {
BabelCall {
value: value.into(),
post_blank,
}
.into()
} else {
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank,
}
.into()
}
}
}
fn parse_internal(input: &str) -> IResult<&str, RawKeyword, ()> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawKeyword {
key,
optional,
value: value.trim(),
post_blank,
},
))
}
#[test]
fn parse() {
assert_eq!(
RawKeyword::parse("#+KEY:"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+KEY: VALUE"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+K_E_Y: VALUE"),
Some((
"",
RawKeyword {
key: "K_E_Y",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+KEY:VALUE\n"),
Some((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert!(RawKeyword::parse("#+KE Y: VALUE").is_none());
assert!(RawKeyword::parse("#+ KEY: VALUE").is_none());
assert_eq!(
RawKeyword::parse("#+RESULTS:"),
Some((
"",
RawKeyword {
key: "RESULTS",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some((
"",
RawKeyword {
key: "ATTR_LATEX",
optional: None,
value: ":width 5cm",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+CALL: double(n=4)"),
Some((
"",
RawKeyword {
key: "CALL",
optional: None,
value: "double(n=4)",
post_blank: 0
}
))
);
assert_eq!(
RawKeyword::parse("#+CAPTION[Short caption]: Longer caption."),
Some((
"",
RawKeyword {
key: "CAPTION",
optional: Some("Short caption"),
value: "Longer caption.",
post_blank: 0
}
))
);
}

80
src/elements/link.rs Normal file
View file

@ -0,0 +1,80 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::delimited,
IResult,
};
/// Link Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Link<'a> {
/// Link destination
pub path: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub desc: Option<Cow<'a, str>>,
}
impl Link<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, Link)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Link<'static> {
Link {
path: self.path.into_owned().into(),
desc: self.desc.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Link, ()> {
let (input, path) = delimited(
tag("[["),
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
tag("]"),
)(input)?;
let (input, desc) = opt(delimited(
tag("["),
take_while(|c: char| c != '[' && c != ']'),
tag("]"),
))(input)?;
let (input, _) = tag("]")(input)?;
Ok((
input,
Link {
path: path.into(),
desc: desc.map(Into::into),
},
))
}
#[test]
fn parse() {
assert_eq!(
Link::parse("[[#id]]"),
Some((
"",
Link {
path: "#id".into(),
desc: None
}
))
);
assert_eq!(
Link::parse("[[#id][desc]]"),
Some((
"",
Link {
path: "#id".into(),
desc: Some("desc".into())
}
))
);
assert!(Link::parse("[[#id][desc]").is_none());
}

316
src/elements/list.rs Normal file
View file

@ -0,0 +1,316 @@
use std::borrow::Cow;
use std::iter::once;
use memchr::{memchr, memchr_iter};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, space0},
combinator::{map, recognize},
sequence::terminated,
IResult,
};
/// Plain List Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct List {
/// List indent, number of whitespaces
pub indent: usize,
/// List's type, determined by the first item of this list
pub ordered: bool,
/// Numbers of blank lines between last list's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
/// List Item Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct ListItem<'a> {
/// List item bullet
pub bullet: Cow<'a, str>,
/// List item indent, number of whitespaces
pub indent: usize,
/// List item type
pub ordered: bool,
// TODO checkbox
// TODO counter
// TODO tag
}
impl ListItem<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, (ListItem, &str))> {
list_item(input).ok()
}
pub fn into_owned(self) -> ListItem<'static> {
ListItem {
bullet: self.bullet.into_owned().into(),
indent: self.indent,
ordered: self.ordered,
}
}
}
fn list_item(input: &str) -> IResult<&str, (ListItem, &str), ()> {
let (input, indent) = map(space0, |s: &str| s.len())(input)?;
let (input, bullet) = recognize(alt((
tag("+ "),
tag("* "),
tag("- "),
terminated(digit1, tag(". ")),
)))(input)?;
let (input, contents) = list_item_contents(input, indent);
Ok((
input,
(
ListItem {
bullet: bullet.into(),
indent,
ordered: bullet.starts_with(|c: char| c.is_ascii_digit()),
},
contents,
),
))
}
fn list_item_contents(input: &str, indent: usize) -> (&str, &str) {
let mut last_end = memchr(b'\n', input.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| input.len());
for i in memchr_iter(b'\n', input.as_bytes())
.map(|i| i + 1)
.chain(once(input.len()))
.skip(1)
{
if input[last_end..i]
.as_bytes()
.iter()
.all(u8::is_ascii_whitespace)
{
let x = memchr(b'\n', &input[i..].as_bytes())
.map(|ii| i + ii + 1)
.unwrap_or_else(|| input.len());
// two consecutive empty lines
if input[i..x].as_bytes().iter().all(u8::is_ascii_whitespace) {
return (&input[x..], &input[0..x]);
}
}
// line less or equally indented than the starting line
if input[last_end..i]
.as_bytes()
.iter()
.take(indent + 1)
.any(|c| !c.is_ascii_whitespace())
{
return (&input[last_end..], &input[0..last_end]);
}
last_end = i;
}
("", input)
}
#[test]
fn parse() {
assert_eq!(
list_item(
r#"+ item1
+ item2"#
),
Ok((
"+ item2",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
* item2"#
),
Ok((
"* item2",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
* item2"#
),
Ok((
"* item2",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"* item1
"#
),
Ok((
"",
(
ListItem {
bullet: "* ".into(),
indent: 0,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"+ item1
+ item2
"#
),
Ok((
"",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
+ item2
"#
)
))
);
assert_eq!(
list_item(
r#"+ item1
+ item2
+ item 3"#
),
Ok((
"+ item 3",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"item1
+ item2
"#
)
))
);
assert_eq!(
list_item(
r#" + item1
+ item2"#
),
Ok((
" + item2",
(
ListItem {
bullet: "+ ".into(),
indent: 2,
ordered: false,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#" 1. item1
2. item2
3. item3"#
),
Ok((
r#"2. item2
3. item3"#,
(
ListItem {
bullet: "1. ".into(),
indent: 2,
ordered: true,
},
r#"item1
"#
)
))
);
assert_eq!(
list_item(
r#"+ 1
- 2
- 3
+ 4"#
),
Ok((
"+ 4",
(
ListItem {
bullet: "+ ".into(),
indent: 0,
ordered: false,
},
r#"1
- 2
- 3
"#
)
))
);
}

91
src/elements/macros.rs Normal file
View file

@ -0,0 +1,91 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
combinator::{opt, verify},
sequence::delimited,
IResult,
};
/// Macro Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Macros<'a> {
/// Macro name
pub name: Cow<'a, str>,
/// Arguments passed to the macro
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub arguments: Option<Cow<'a, str>>,
}
impl Macros<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Macros)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Macros<'static> {
Macros {
name: self.name.into_owned().into(),
arguments: self.arguments.map(Into::into).map(Cow::Owned),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Macros, ()> {
let (input, _) = tag("{{{")(input)?;
let (input, name) = verify(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()),
)(input)?;
let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?;
let (input, _) = tag("}}}")(input)?;
Ok((
input,
Macros {
name: name.into(),
arguments: arguments.map(Into::into),
},
))
}
#[test]
fn test() {
assert_eq!(
Macros::parse("{{{poem(red,blue)}}}"),
Some((
"",
Macros {
name: "poem".into(),
arguments: Some("red,blue".into())
}
))
);
assert_eq!(
Macros::parse("{{{poem())}}}"),
Some((
"",
Macros {
name: "poem".into(),
arguments: Some(")".into())
}
))
);
assert_eq!(
Macros::parse("{{{author}}}"),
Some((
"",
Macros {
name: "author".into(),
arguments: None
}
))
);
assert!(Macros::parse("{{{0uthor}}}").is_none());
assert!(Macros::parse("{{{author}}").is_none());
assert!(Macros::parse("{{{poem(}}}").is_none());
assert!(Macros::parse("{{{poem)}}}").is_none());
}

245
src/elements/mod.rs Normal file
View file

@ -0,0 +1,245 @@
//! Org-mode elements
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod comment;
pub(crate) mod cookie;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod emphasis;
pub(crate) mod fixed_width;
pub(crate) mod fn_def;
pub(crate) mod fn_ref;
pub(crate) mod inline_call;
pub(crate) mod inline_src;
pub(crate) mod keyword;
pub(crate) mod link;
pub(crate) mod list;
pub(crate) mod macros;
pub(crate) mod planning;
pub(crate) mod radio_target;
pub(crate) mod rule;
pub(crate) mod snippet;
pub(crate) mod table;
pub(crate) mod target;
pub(crate) mod timestamp;
pub(crate) mod title;
pub use self::{
block::{
CenterBlock, CommentBlock, ExampleBlock, ExportBlock, QuoteBlock, SourceBlock,
SpecialBlock, VerseBlock,
},
clock::Clock,
comment::Comment,
cookie::Cookie,
drawer::Drawer,
dyn_block::DynBlock,
fixed_width::FixedWidth,
fn_def::FnDef,
fn_ref::FnRef,
inline_call::InlineCall,
inline_src::InlineSrc,
keyword::{BabelCall, Keyword},
link::Link,
list::{List, ListItem},
macros::Macros,
planning::Planning,
rule::Rule,
snippet::Snippet,
table::{Table, TableCell, TableRow},
target::Target,
timestamp::{Datetime, Timestamp},
title::Title,
};
use std::borrow::Cow;
/// Element Enum
#[derive(Debug)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "type", rename_all = "kebab-case"))]
pub enum Element<'a> {
SpecialBlock(SpecialBlock<'a>),
QuoteBlock(QuoteBlock<'a>),
CenterBlock(CenterBlock<'a>),
VerseBlock(VerseBlock<'a>),
CommentBlock(CommentBlock<'a>),
ExampleBlock(ExampleBlock<'a>),
ExportBlock(ExportBlock<'a>),
SourceBlock(SourceBlock<'a>),
BabelCall(BabelCall<'a>),
Section,
Clock(Clock<'a>),
Cookie(Cookie<'a>),
RadioTarget,
Drawer(Drawer<'a>),
Document { pre_blank: usize },
DynBlock(DynBlock<'a>),
FnDef(FnDef<'a>),
FnRef(FnRef<'a>),
Headline { level: usize },
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Keyword(Keyword<'a>),
Link(Link<'a>),
List(List),
ListItem(ListItem<'a>),
Macros(Macros<'a>),
Snippet(Snippet<'a>),
Text { value: Cow<'a, str> },
Paragraph { post_blank: usize },
Rule(Rule),
Timestamp(Timestamp<'a>),
Target(Target<'a>),
Bold,
Strike,
Italic,
Underline,
Verbatim { value: Cow<'a, str> },
Code { value: Cow<'a, str> },
Comment(Comment<'a>),
FixedWidth(FixedWidth<'a>),
Title(Title<'a>),
Table(Table<'a>),
TableRow(TableRow),
TableCell(TableCell),
}
impl Element<'_> {
pub fn is_container(&self) -> bool {
match self {
Element::SpecialBlock(_)
| Element::QuoteBlock(_)
| Element::CenterBlock(_)
| Element::VerseBlock(_)
| Element::Bold
| Element::Document { .. }
| Element::DynBlock(_)
| Element::Headline { .. }
| Element::Italic
| Element::List(_)
| Element::ListItem(_)
| Element::Paragraph { .. }
| Element::Section
| Element::Strike
| Element::Underline
| Element::Title(_)
| Element::Table(_)
| Element::TableRow(TableRow::Header)
| Element::TableRow(TableRow::Body)
| Element::TableCell(_) => true,
_ => false,
}
}
pub fn into_owned(self) -> Element<'static> {
use Element::*;
match self {
SpecialBlock(e) => SpecialBlock(e.into_owned()),
QuoteBlock(e) => QuoteBlock(e.into_owned()),
CenterBlock(e) => CenterBlock(e.into_owned()),
VerseBlock(e) => VerseBlock(e.into_owned()),
CommentBlock(e) => CommentBlock(e.into_owned()),
ExampleBlock(e) => ExampleBlock(e.into_owned()),
ExportBlock(e) => ExportBlock(e.into_owned()),
SourceBlock(e) => SourceBlock(e.into_owned()),
BabelCall(e) => BabelCall(e.into_owned()),
Section => Section,
Clock(e) => Clock(e.into_onwed()),
Cookie(e) => Cookie(e.into_owned()),
RadioTarget => RadioTarget,
Drawer(e) => Drawer(e.into_owned()),
Document { pre_blank } => Document { pre_blank },
DynBlock(e) => DynBlock(e.into_owned()),
FnDef(e) => FnDef(e.into_owned()),
FnRef(e) => FnRef(e.into_owned()),
Headline { level } => Headline { level },
InlineCall(e) => InlineCall(e.into_owned()),
InlineSrc(e) => InlineSrc(e.into_owned()),
Keyword(e) => Keyword(e.into_owned()),
Link(e) => Link(e.into_owned()),
List(e) => List(e),
ListItem(e) => ListItem(e.into_owned()),
Macros(e) => Macros(e.into_owned()),
Snippet(e) => Snippet(e.into_owned()),
Text { value } => Text {
value: value.into_owned().into(),
},
Paragraph { post_blank } => Paragraph { post_blank },
Rule(e) => Rule(e),
Timestamp(e) => Timestamp(e.into_owned()),
Target(e) => Target(e.into_owned()),
Bold => Bold,
Strike => Strike,
Italic => Italic,
Underline => Underline,
Verbatim { value } => Verbatim {
value: value.into_owned().into(),
},
Code { value } => Code {
value: value.into_owned().into(),
},
Comment(e) => Comment(e.into_owned()),
FixedWidth(e) => FixedWidth(e.into_owned()),
Title(e) => Title(e.into_owned()),
Table(e) => Table(e.into_owned()),
TableRow(e) => TableRow(e),
TableCell(e) => TableCell(e),
}
}
}
macro_rules! impl_from {
($($ele0:ident),*; $($ele1:ident),*) => {
$(
impl<'a> From<$ele0<'a>> for Element<'a> {
fn from(ele: $ele0<'a>) -> Element<'a> {
Element::$ele0(ele)
}
}
)*
$(
impl<'a> From<$ele1> for Element<'a> {
fn from(ele: $ele1) -> Element<'a> {
Element::$ele1(ele)
}
}
)*
};
}
impl_from!(
BabelCall,
CenterBlock,
Clock,
Comment,
CommentBlock,
Cookie,
Drawer,
DynBlock,
ExampleBlock,
ExportBlock,
FixedWidth,
FnDef,
FnRef,
InlineCall,
InlineSrc,
Keyword,
Link,
ListItem,
Macros,
QuoteBlock,
Snippet,
SourceBlock,
SpecialBlock,
Table,
Target,
Timestamp,
Title,
VerseBlock;
List,
Rule,
TableRow
);

98
src/elements/planning.rs Normal file
View file

@ -0,0 +1,98 @@
use memchr::memchr;
use crate::elements::Timestamp;
/// Planning element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Planning<'a> {
/// Timestamp associated to deadline keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub deadline: Option<Timestamp<'a>>,
/// Timestamp associated to scheduled keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub scheduled: Option<Timestamp<'a>>,
/// Timestamp associated to closed keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub closed: Option<Timestamp<'a>>,
}
impl Planning<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Planning)> {
let (mut deadline, mut scheduled, mut closed) = (None, None, None);
let (mut tail, off) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
while let Some(i) = memchr(b' ', tail.as_bytes()) {
let next = &tail[i + 1..].trim_start();
macro_rules! set_timestamp {
($timestamp:expr) => {{
let (new_tail, timestamp) =
Timestamp::parse_active(next).or(Timestamp::parse_inactive(next))?;
$timestamp = Some(timestamp);
tail = new_tail.trim_start();
}};
}
match &tail[..i] {
"DEADLINE:" if deadline.is_none() => set_timestamp!(deadline),
"SCHEDULED:" if scheduled.is_none() => set_timestamp!(scheduled),
"CLOSED:" if closed.is_none() => set_timestamp!(closed),
_ => return None,
}
}
if deadline.is_none() && scheduled.is_none() && closed.is_none() {
None
} else {
Some((
&text[off..],
Planning {
deadline,
scheduled,
closed,
},
))
}
}
pub fn into_owned(self) -> Planning<'static> {
Planning {
deadline: self.deadline.map(|x| x.into_owned()),
scheduled: self.scheduled.map(|x| x.into_owned()),
closed: self.closed.map(|x| x.into_owned()),
}
}
}
#[test]
fn prase() {
use crate::elements::Datetime;
assert_eq!(
Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"),
Some((
"",
Planning {
scheduled: Some(Timestamp::Active {
start: Datetime {
year: 2019,
month: 4,
day: 8,
dayname: "Mon".into(),
hour: None,
minute: None
},
repeater: None,
delay: None
}),
deadline: None,
closed: None,
}
))
)
}

View file

@ -0,0 +1,40 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[inline]
pub fn parse_radio_target(input: &str) -> Option<(&str, &str)> {
parse_internal(input).ok()
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, &str, ()> {
let (input, contents) = delimited(
tag("<<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>>"),
)(input)?;
Ok((input, contents))
}
#[test]
fn parse() {
assert_eq!(parse_radio_target("<<<target>>>"), Some(("", "target")));
assert_eq!(parse_radio_target("<<<tar get>>>"), Some(("", "tar get")));
assert!(parse_radio_target("<<<target >>>").is_none());
assert!(parse_radio_target("<<< target>>>").is_none());
assert!(parse_radio_target("<<<ta<get>>>").is_none());
assert!(parse_radio_target("<<<ta>get>>>").is_none());
assert!(parse_radio_target("<<<ta\nget>>>").is_none());
assert!(parse_radio_target("<<<target>>").is_none());
}

48
src/elements/rule.rs Normal file
View file

@ -0,0 +1,48 @@
use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult};
use crate::parse::combinators::{blank_lines_count, eol};
#[derive(Debug, Default, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
pub struct Rule {
/// Numbers of blank lines between rule line and next non-blank line or
/// buffer's end
pub post_blank: usize,
}
impl Rule {
pub(crate) fn parse(input: &str) -> Option<(&str, Rule)> {
parse_internal(input).ok()
}
}
fn parse_internal(input: &str) -> IResult<&str, Rule, ()> {
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((input, Rule { post_blank }))
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----"), Some(("", Rule { post_blank: 0 })));
assert_eq!(Rule::parse("--------"), Some(("", Rule { post_blank: 0 })));
assert_eq!(
Rule::parse("-----\n\n\n"),
Some(("", Rule { post_blank: 2 }))
);
assert_eq!(Rule::parse("----- \n"), Some(("", Rule { post_blank: 0 })));
assert!(Rule::parse("").is_none());
assert!(Rule::parse("----").is_none());
assert!(Rule::parse("----").is_none());
assert!(Rule::parse("None----").is_none());
assert!(Rule::parse("None ----").is_none());
assert!(Rule::parse("None------").is_none());
assert!(Rule::parse("----None----").is_none());
assert!(Rule::parse("\t\t----").is_none());
assert!(Rule::parse("------None").is_none());
assert!(Rule::parse("----- None").is_none());
}

100
src/elements/snippet.rs Normal file
View file

@ -0,0 +1,100 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
sequence::{delimited, separated_pair},
IResult,
};
/// Export Snippet Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Snippet<'a> {
/// Back-end name
pub name: Cow<'a, str>,
/// Export code
pub value: Cow<'a, str>,
}
impl Snippet<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Snippet)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Snippet<'static> {
Snippet {
name: self.name.into_owned().into(),
value: self.value.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Snippet, ()> {
let (input, (name, value)) = delimited(
tag("@@"),
separated_pair(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
tag(":"),
take_until("@@"),
),
take(2usize),
)(input)?;
Ok((
input,
Snippet {
name: name.into(),
value: value.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Snippet::parse("@@html:<b>@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "<b>".into()
}
))
);
assert_eq!(
Snippet::parse("@@latex:any arbitrary LaTeX code@@"),
Some((
"",
Snippet {
name: "latex".into(),
value: "any arbitrary LaTeX code".into(),
}
))
);
assert_eq!(
Snippet::parse("@@html:@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "".into(),
}
))
);
assert_eq!(
Snippet::parse("@@html:<p>@</p>@@"),
Some((
"",
Snippet {
name: "html".into(),
value: "<p>@</p>".into(),
}
))
);
assert!(Snippet::parse("@@html:<b>@").is_none());
assert!(Snippet::parse("@@html<b>@@").is_none());
assert!(Snippet::parse("@@:<b>@@").is_none());
}

169
src/elements/table.rs Normal file
View file

@ -0,0 +1,169 @@
use std::borrow::Cow;
use nom::{
error::{make_error, ErrorKind},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, line, lines_while};
/// Table Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_type"))]
pub enum Table<'a> {
/// "org" type table
#[cfg_attr(feature = "ser", serde(rename = "org"))]
Org {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
tblfm: Option<Cow<'a, str>>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
has_header: bool,
},
/// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))]
TableEl {
value: Cow<'a, str>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
}
impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table)> {
Self::parse_table_el_internal(input).ok()
}
fn parse_table_el_internal(input: &str) -> IResult<&str, Table, ()> {
let (_, first_line) = line(input)?;
let first_line = first_line.trim();
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-")
|| first_line
.as_bytes()
.iter()
.any(|&c| c != b'+' && c != b'-')
{
// TODO: better error kind
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
// Table.el tables end at the first line not starting with either a vertical line or a plus sign.
let (input, content) = lines_while(|line| {
let line = line.trim_start();
line.starts_with('|') || line.starts_with('+')
})(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
Table::TableEl {
value: content.into(),
post_blank,
},
))
}
pub fn into_owned(self) -> Table<'static> {
match self {
Table::Org {
tblfm,
post_blank,
has_header,
} => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank,
has_header,
},
Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(),
post_blank,
},
}
}
}
/// Table Row Element
///
/// # Syntax
///
/// ```text
/// | 0 | 1 | 2 | <- TableRow::Body
/// | 0 | 1 | 2 | <- TableRow::Body
/// ```
///
/// ```text
/// |-----+-----+-----| <- ignores
/// | 0 | 1 | 2 | <- TableRow::Header
/// | 0 | 1 | 2 | <- TableRow::Header
/// |-----+-----+-----| <- TableRow::HeaderRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- ignores
/// ```
///
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableRow {
/// This row is part of table header
Header,
/// This row is part of table body
Body,
/// This row is between table header and body
HeaderRule,
/// This row is between table body and next body
BodyRule,
}
/// Table Cell Element
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableCell {
/// Header cell
Header,
/// Body cell, or standard cell
Body,
}
#[test]
fn parse_table_el_() {
assert_eq!(
Table::parse_table_el(
r#" +---+
| |
+---+
"#
),
Some((
"",
Table::TableEl {
value: r#" +---+
| |
+---+
"#
.into(),
post_blank: 1
}
))
);
assert!(Table::parse_table_el("").is_none());
assert!(Table::parse_table_el("+----|---").is_none());
}

78
src/elements/target.rs Normal file
View file

@ -0,0 +1,78 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
/// Target Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Target<'a> {
/// Target ID
pub target: Cow<'a, str>,
}
impl Target<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> Option<(&str, Target)> {
parse_internal(input).ok()
}
pub fn into_owned(self) -> Target<'static> {
Target {
target: self.target.into_owned().into(),
}
}
}
#[inline]
fn parse_internal(input: &str) -> IResult<&str, Target, ()> {
let (input, target) = delimited(
tag("<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>"),
)(input)?;
Ok((
input,
Target {
target: target.into(),
},
))
}
#[test]
fn parse() {
assert_eq!(
Target::parse("<<target>>"),
Some((
"",
Target {
target: "target".into()
}
))
);
assert_eq!(
Target::parse("<<tar get>>"),
Some((
"",
Target {
target: "tar get".into()
}
))
);
assert!(Target::parse("<<target >>").is_none());
assert!(Target::parse("<< target>>").is_none());
assert!(Target::parse("<<ta<get>>").is_none());
assert!(Target::parse("<<ta>get>>").is_none());
assert!(Target::parse("<<ta\nget>>").is_none());
assert!(Target::parse("<<target>").is_none());
}

482
src/elements/timestamp.rs Normal file
View file

@ -0,0 +1,482 @@
use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take, take_till, take_while, take_while_m_n},
character::complete::{space0, space1},
combinator::{map, map_res, opt},
sequence::preceded,
IResult,
};
/// Datetime Struct
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Datetime<'a> {
pub year: u16,
pub month: u8,
pub day: u8,
pub dayname: Cow<'a, str>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub hour: Option<u8>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub minute: Option<u8>,
}
impl Datetime<'_> {
pub fn into_owned(self) -> Datetime<'static> {
Datetime {
year: self.year,
month: self.month,
day: self.day,
dayname: self.dayname.into_owned().into(),
hour: self.hour,
minute: self.minute,
}
}
}
#[cfg(feature = "chrono")]
mod chrono {
use super::Datetime;
use chrono::*;
impl Into<NaiveDate> for Datetime<'_> {
fn into(self) -> NaiveDate {
(&self).into()
}
}
impl Into<NaiveTime> for Datetime<'_> {
fn into(self) -> NaiveTime {
(&self).into()
}
}
impl Into<NaiveDateTime> for Datetime<'_> {
fn into(self) -> NaiveDateTime {
(&self).into()
}
}
impl Into<DateTime<Utc>> for Datetime<'_> {
fn into(self) -> DateTime<Utc> {
(&self).into()
}
}
impl Into<NaiveDate> for &Datetime<'_> {
fn into(self) -> NaiveDate {
NaiveDate::from_ymd(self.year.into(), self.month.into(), self.day.into())
}
}
impl Into<NaiveTime> for &Datetime<'_> {
fn into(self) -> NaiveTime {
NaiveTime::from_hms(
self.hour.unwrap_or_default().into(),
self.minute.unwrap_or_default().into(),
0,
)
}
}
impl Into<NaiveDateTime> for &Datetime<'_> {
fn into(self) -> NaiveDateTime {
NaiveDateTime::new(self.into(), self.into())
}
}
impl Into<DateTime<Utc>> for &Datetime<'_> {
fn into(self) -> DateTime<Utc> {
DateTime::from_utc(self.into(), Utc)
}
}
}
/// Timestamp Object
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
#[cfg_attr(feature = "ser", serde(tag = "timestamp_type"))]
#[derive(Debug, Clone)]
pub enum Timestamp<'a> {
Active {
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
Inactive {
start: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
ActiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
InactiveRange {
start: Datetime<'a>,
end: Datetime<'a>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
repeater: Option<Cow<'a, str>>,
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
delay: Option<Cow<'a, str>>,
},
Diary {
value: Cow<'a, str>,
},
}
impl Timestamp<'_> {
pub(crate) fn parse_active(input: &str) -> Option<(&str, Timestamp)> {
parse_active(input).ok()
}
pub(crate) fn parse_inactive(input: &str) -> Option<(&str, Timestamp)> {
parse_inactive(input).ok()
}
pub(crate) fn parse_diary(input: &str) -> Option<(&str, Timestamp)> {
parse_diary(input).ok()
}
pub fn into_owned(self) -> Timestamp<'static> {
match self {
Timestamp::Active {
start,
repeater,
delay,
} => Timestamp::Active {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::Inactive {
start,
repeater,
delay,
} => Timestamp::Inactive {
start: start.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::ActiveRange {
start,
end,
repeater,
delay,
} => Timestamp::ActiveRange {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::InactiveRange {
start,
end,
repeater,
delay,
} => Timestamp::InactiveRange {
start: start.into_owned(),
end: end.into_owned(),
repeater: repeater.map(Into::into).map(Cow::Owned),
delay: delay.map(Into::into).map(Cow::Owned),
},
Timestamp::Diary { value } => Timestamp::Diary {
value: value.into_owned().into(),
},
}
}
}
pub fn parse_active(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("<")(input)?;
let (input, start) = parse_datetime(input)?;
if input.starts_with('-') {
let (input, (hour, minute)) = parse_time(&input[1..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
let mut end = start.clone();
end.hour = Some(hour);
end.minute = Some(minute);
return Ok((
input,
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
},
));
}
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
if input.starts_with("--<") {
let (input, end) = parse_datetime(&input["--<".len()..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag(">")(input)?;
Ok((
input,
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
},
))
} else {
Ok((
input,
Timestamp::Active {
start,
repeater: None,
delay: None,
},
))
}
}
pub fn parse_inactive(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("[")(input)?;
let (input, start) = parse_datetime(input)?;
if input.starts_with('-') {
let (input, (hour, minute)) = parse_time(&input[1..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
let mut end = start.clone();
end.hour = Some(hour);
end.minute = Some(minute);
return Ok((
input,
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
},
));
}
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
if input.starts_with("--[") {
let (input, end) = parse_datetime(&input["--[".len()..])?;
let (input, _) = space0(input)?;
// TODO: delay-or-repeater
let (input, _) = tag("]")(input)?;
Ok((
input,
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
},
))
} else {
Ok((
input,
Timestamp::Inactive {
start,
repeater: None,
delay: None,
},
))
}
}
pub fn parse_diary(input: &str) -> IResult<&str, Timestamp, ()> {
let (input, _) = tag("<%%(")(input)?;
let (input, value) = take_till(|c| c == ')' || c == '>' || c == '\n')(input)?;
let (input, _) = tag(")>")(input)?;
Ok((
input,
Timestamp::Diary {
value: value.into(),
},
))
}
fn parse_time(input: &str) -> IResult<&str, (u8, u8), ()> {
let (input, hour) = map_res(take_while_m_n(1, 2, |c: char| c.is_ascii_digit()), |num| {
u8::from_str_radix(num, 10)
})(input)?;
let (input, _) = tag(":")(input)?;
let (input, minute) = map_res(take(2usize), |num| u8::from_str_radix(num, 10))(input)?;
Ok((input, (hour, minute)))
}
fn parse_datetime(input: &str) -> IResult<&str, Datetime, ()> {
let parse_u8 = |num| u8::from_str_radix(num, 10);
let (input, year) = map_res(take(4usize), |num| u16::from_str_radix(num, 10))(input)?;
let (input, _) = tag("-")(input)?;
let (input, month) = map_res(take(2usize), parse_u8)(input)?;
let (input, _) = tag("-")(input)?;
let (input, day) = map_res(take(2usize), parse_u8)(input)?;
let (input, _) = space1(input)?;
let (input, dayname) = take_while(|c: char| {
!c.is_ascii_whitespace()
&& !c.is_ascii_digit()
&& c != '+'
&& c != '-'
&& c != ']'
&& c != '>'
})(input)?;
let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| {
(time.map(|t| t.0), time.map(|t| t.1))
})(input)?;
Ok((
input,
Datetime {
year,
month,
day,
dayname: dayname.into(),
hour,
minute,
},
))
}
// TODO
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum RepeaterType {
// Cumulate,
// CatchUp,
// Restart,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum DelayType {
// All,
// First,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub enum TimeUnit {
// Hour,
// Day,
// Week,
// Month,
// Year,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub struct Repeater {
// pub ty: RepeaterType,
// pub value: usize,
// pub unit: TimeUnit,
// }
// #[cfg_attr(test, derive(PartialEq))]
// #[cfg_attr(feature = "ser", derive(serde::Serialize))]
// #[derive(Debug, Copy, Clone)]
// pub struct Delay {
// pub ty: DelayType,
// pub value: usize,
// pub unit: TimeUnit,
// }
#[test]
fn parse() {
assert_eq!(
parse_inactive("[2003-09-16 Tue]"),
Ok((
"",
Timestamp::Inactive {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: None,
minute: None
},
repeater: None,
delay: None,
},
))
);
assert_eq!(
parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"),
Ok((
"",
Timestamp::InactiveRange {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39)
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39),
},
repeater: None,
delay: None
},
))
);
assert_eq!(
parse_active("<2003-09-16 Tue 09:39-10:39>"),
Ok((
"",
Timestamp::ActiveRange {
start: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(9),
minute: Some(39),
},
end: Datetime {
year: 2003,
month: 9,
day: 16,
dayname: "Tue".into(),
hour: Some(10),
minute: Some(39),
},
repeater: None,
delay: None
},
))
);
}

510
src/elements/title.rs Normal file
View file

@ -0,0 +1,510 @@
//! Headline Title
#[cfg(not(feature = "indexmap"))]
pub type PropertiesMap<K, V> = std::collections::HashMap<K, V>;
#[cfg(feature = "indexmap")]
pub type PropertiesMap<K, V> = indexmap::IndexMap<K, V>;
use std::borrow::Cow;
use memchr::memrchr2;
use nom::{
branch::alt,
bytes::complete::{tag, take_until, take_while},
character::complete::{anychar, line_ending, space1},
combinator::{map, opt, verify},
error::{make_error, ErrorKind},
multi::fold_many0,
sequence::{delimited, preceded},
Err, IResult,
};
use crate::{
config::ParseConfig,
elements::{drawer::parse_drawer_without_blank, Planning, Timestamp},
parse::combinators::{blank_lines_count, line, one_word},
};
/// Title Element
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Title<'a> {
/// Headline level, number of stars
pub level: usize,
/// Headline priority cookie
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub priority: Option<char>,
/// Headline title tags
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Vec::is_empty"))]
pub tags: Vec<Cow<'a, str>>,
/// Headline todo keyword
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub keyword: Option<Cow<'a, str>>,
/// Raw headline's text, without the stars and the tags
pub raw: Cow<'a, str>,
/// Planning element associated to this headline
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
pub planning: Option<Box<Planning<'a>>>,
/// Property drawer associated to this headline
#[cfg_attr(
feature = "ser",
serde(skip_serializing_if = "PropertiesMap::is_empty")
)]
pub properties: PropertiesMap<Cow<'a, str>, Cow<'a, str>>,
/// Numbers of blank lines between last title's line and next non-blank line
/// or buffer's end
pub post_blank: usize,
}
impl Title<'_> {
pub(crate) fn parse<'a>(
input: &'a str,
config: &ParseConfig,
) -> Option<(&'a str, (Title<'a>, &'a str))> {
parse_title(input, config).ok()
}
// TODO: fn is_quoted(&self) -> bool { }
// TODO: fn is_footnote_section(&self) -> bool { }
/// Returns this headline's closed timestamp, or `None` if not set.
pub fn closed(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.closed.as_ref())
}
/// Returns this headline's scheduled timestamp, or `None` if not set.
pub fn scheduled(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.scheduled.as_ref())
}
/// Returns this headline's deadline timestamp, or `None` if not set.
pub fn deadline(&self) -> Option<&Timestamp> {
self.planning.as_ref().and_then(|p| p.deadline.as_ref())
}
/// Returns `true` if this headline is archived
pub fn is_archived(&self) -> bool {
self.tags.iter().any(|tag| tag == "ARCHIVE")
}
/// Returns `true` if this headline is commented
pub fn is_commented(&self) -> bool {
self.raw.starts_with("COMMENT")
&& (self.raw.len() == 7 || self.raw[7..].starts_with(char::is_whitespace))
}
pub fn into_owned(self) -> Title<'static> {
Title {
level: self.level,
priority: self.priority,
tags: self
.tags
.into_iter()
.map(|s| s.into_owned().into())
.collect(),
keyword: self.keyword.map(Into::into).map(Cow::Owned),
raw: self.raw.into_owned().into(),
planning: self.planning.map(|p| Box::new(p.into_owned())),
properties: self
.properties
.into_iter()
.map(|(k, v)| (k.into_owned().into(), v.into_owned().into()))
.collect(),
post_blank: self.post_blank,
}
}
}
impl Default for Title<'_> {
fn default() -> Title<'static> {
Title {
level: 1,
priority: None,
tags: Vec::new(),
keyword: None,
raw: Cow::Borrowed(""),
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
}
}
}
fn white_spaces_or_eol(input: &str) -> IResult<&str, &str, ()> {
alt((space1, line_ending))(input)
}
#[inline]
fn parse_title<'a>(
input: &'a str,
config: &ParseConfig,
) -> IResult<&'a str, (Title<'a>, &'a str), ()> {
let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?;
debug_assert!(level > 0);
let (input, keyword) = opt(preceded(
space1,
verify(one_word, |s: &str| {
config.todo_keywords.0.iter().any(|x| x == s)
|| config.todo_keywords.1.iter().any(|x| x == s)
}),
))(input)?;
let (input, priority) = opt(delimited(
space1,
delimited(
tag("[#"),
verify(anychar, |c: &char| c.is_ascii_uppercase()),
tag("]"),
),
white_spaces_or_eol,
))(input)?;
let (input, tail) = line(input)?;
let tail = tail.trim();
// tags can be separated by space or \t
let (raw, tags) = memrchr2(b' ', b'\t', tail.as_bytes())
.map(|i| (tail[0..i].trim(), &tail[i + 1..]))
.filter(|(_, x)| is_tag_line(x))
.unwrap_or((tail, ""));
let tags = tags
.split(':')
.filter(|s| !s.is_empty())
.map(Into::into)
.collect();
let (input, planning) = Planning::parse(input)
.map(|(input, planning)| (input, Some(Box::new(planning))))
.unwrap_or((input, None));
let (input, properties) = opt(parse_properties_drawer)(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
Title {
properties: properties.unwrap_or_default(),
level,
keyword: keyword.map(Into::into),
priority,
tags,
raw: raw.into(),
planning,
post_blank,
},
raw,
),
))
}
fn is_tag_line(input: &str) -> bool {
input.len() > 2
&& input.starts_with(':')
&& input.ends_with(':')
&& input.chars().all(|ch| {
ch.is_alphanumeric() || ch == '_' || ch == '@' || ch == '#' || ch == '%' || ch == ':'
})
}
#[inline]
fn parse_properties_drawer(
input: &str,
) -> IResult<&str, PropertiesMap<Cow<'_, str>, Cow<'_, str>>, ()> {
let (input, (drawer, content)) = parse_drawer_without_blank(input.trim_start())?;
if drawer.name != "PROPERTIES" {
return Err(Err::Error(make_error(input, ErrorKind::Tag)));
}
let (_, map) = fold_many0(
parse_node_property,
PropertiesMap::new(),
|mut acc: PropertiesMap<_, _>, (name, value)| {
acc.insert(name.into(), value.into());
acc
},
)(content)?;
Ok((input, map))
}
#[inline]
fn parse_node_property(input: &str) -> IResult<&str, (&str, &str), ()> {
let (input, _) = blank_lines_count(input)?;
let input = input.trim_start();
let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| {
s.trim_end_matches('+')
})(input)?;
let (input, value) = line(input)?;
Ok((input, (name, value.trim())))
}
#[test]
fn parse_title_() {
use crate::config::DEFAULT_CONFIG;
assert_eq!(
parse_title("**** DONE [#A] COMMENT Title :tag:a2%:", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: Some('A'),
raw: "COMMENT Title".into(),
tags: vec!["tag".into(), "a2%".into()],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** ToDO [#A] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "ToDO [#A] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"ToDO [#A] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** T0DO [#A] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "T0DO [#A] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"T0DO [#A] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** DONE [#1] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: None,
raw: "[#1] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#1] COMMENT Title"
)
))
);
assert_eq!(
parse_title("**** DONE [#a] COMMENT Title", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: Some("DONE".into()),
priority: None,
raw: "[#a] COMMENT Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#a] COMMENT Title"
)
))
);
// https://github.com/PoiScript/orgize/issues/20
assert_eq!(
parse_title("** DONE [#B]::", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 2,
keyword: Some("DONE".into()),
priority: None,
raw: "[#B]::".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"[#B]::"
)
))
);
assert_eq!(
parse_title("**** Title :tag:a2%", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "Title :tag:a2%".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title :tag:a2%"
)
))
);
assert_eq!(
parse_title("**** Title tag:a2%:", &DEFAULT_CONFIG),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "Title tag:a2%:".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title tag:a2%:"
)
))
);
assert_eq!(
parse_title(
"**** DONE Title",
&ParseConfig {
todo_keywords: (vec![], vec![]),
..Default::default()
}
),
Ok((
"",
(
Title {
level: 4,
keyword: None,
priority: None,
raw: "DONE Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"DONE Title"
)
))
);
assert_eq!(
parse_title(
"**** TASK [#A] Title",
&ParseConfig {
todo_keywords: (vec!["TASK".to_string()], vec![]),
..Default::default()
}
),
Ok((
"",
(
Title {
level: 4,
keyword: Some("TASK".into()),
priority: Some('A'),
raw: "Title".into(),
tags: vec![],
planning: None,
properties: PropertiesMap::new(),
post_blank: 0,
},
"Title"
)
))
);
}
#[test]
fn parse_properties_drawer_() {
assert_eq!(
parse_properties_drawer(" :PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Ok((
"",
vec![("CUSTOM_ID".into(), "id".into())]
.into_iter()
.collect::<PropertiesMap<_, _>>()
))
)
}
#[test]
fn preserve_properties_drawer_order() {
let mut properties = Vec::default();
// Use a large number of properties to reduce false pass rate, since HashMap
// is non-deterministic. There are roughly 10^18 possible derangements of this sequence.
for i in 0..20 {
// Avoid alphabetic or numeric order.
let j = (i + 7) % 20;
properties.push((
Cow::Owned(format!(
"{}{}",
if i % 3 == 0 {
"FOO"
} else if i % 3 == 1 {
"QUX"
} else {
"BAR"
},
j
)),
Cow::Owned(i.to_string()),
));
}
let mut s = String::default();
for (k, v) in &properties {
s += &format!(" :{}: {}\n", k, v);
}
let drawer = format!(" :PROPERTIES:\n{}:END:\n", &s);
let mut parsed: Vec<(_, _)> = parse_properties_drawer(&drawer)
.unwrap()
.1
.into_iter()
.collect();
#[cfg(not(feature = "indexmap"))]
parsed.sort();
#[cfg(not(feature = "indexmap"))]
properties.sort();
assert_eq!(parsed, properties);
}

View file

@ -1,468 +0,0 @@
// https://git.sr.ht/~bzg/org-mode/tree/bfa4f9d5aa3e5c94974cae7a459cb5e5b4b15f52/item/lisp/org-entities.el#L85
// nil -> false
// t -> true
// \x00A0 -> \\x00A0
#[rustfmt::skip]
pub const ENTITIES: &[(&str, &str, bool, &str, &str, &str, &str)] = &[
// ("* Letters"
// Latin
("Agrave", "\\`{A}", false, "&Agrave;", "A", "À", "À"),
("agrave", "\\`{a}", false, "&agrave;", "a", "à", "à"),
("Aacute", "\\'{A}", false, "&Aacute;", "A", "Á", "Á"),
("aacute", "\\'{a}", false, "&aacute;", "a", "á", "á"),
("Acirc", "\\^{A}", false, "&Acirc;", "A", "Â", "Â"),
("acirc", "\\^{a}", false, "&acirc;", "a", "â", "â"),
("Amacr", "\\={A}", false, "&Amacr;", "A", "Ã", "Ã"),
("amacr", "\\={a}", false, "&amacr;", "a", "ã", "ã"),
("Atilde", "\\~{A}", false, "&Atilde;", "A", "Ã", "Ã"),
("atilde", "\\~{a}", false, "&atilde;", "a", "ã", "ã"),
("Auml", "\\\"{A}", false, "&Auml;", "Ae", "Ä", "Ä"),
("auml", "\\\"{a}", false, "&auml;", "ae", "ä", "ä"),
("Aring", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("AA", "\\AA{}", false, "&Aring;", "A", "Å", "Å"),
("aring", "\\aa{}", false, "&aring;", "a", "å", "å"),
("AElig", "\\AE{}", false, "&AElig;", "AE", "Æ", "Æ"),
("aelig", "\\ae{}", false, "&aelig;", "ae", "æ", "æ"),
("Ccedil", "\\c{C}", false, "&Ccedil;", "C", "Ç", "Ç"),
("ccedil", "\\c{c}", false, "&ccedil;", "c", "ç", "ç"),
("Egrave", "\\`{E}", false, "&Egrave;", "E", "È", "È"),
("egrave", "\\`{e}", false, "&egrave;", "e", "è", "è"),
("Eacute", "\\'{E}", false, "&Eacute;", "E", "É", "É"),
("eacute", "\\'{e}", false, "&eacute;", "e", "é", "é"),
("Ecirc", "\\^{E}", false, "&Ecirc;", "E", "Ê", "Ê"),
("ecirc", "\\^{e}", false, "&ecirc;", "e", "ê", "ê"),
("Euml", "\\\"{E}", false, "&Euml;", "E", "Ë", "Ë"),
("euml", "\\\"{e}", false, "&euml;", "e", "ë", "ë"),
("Igrave", "\\`{I}", false, "&Igrave;", "I", "Ì", "Ì"),
("igrave", "\\`{i}", false, "&igrave;", "i", "ì", "ì"),
("Iacute", "\\'{I}", false, "&Iacute;", "I", "Í", "Í"),
("iacute", "\\'{i}", false, "&iacute;", "i", "í", "í"),
("Idot", "\\.{I}", false, "&idot;", "I", "İ", "İ"),
("inodot", "\\i", false, "&inodot;", "i", "ı", "ı"),
("Icirc", "\\^{I}", false, "&Icirc;", "I", "Î", "Î"),
("icirc", "\\^{i}", false, "&icirc;", "i", "î", "î"),
("Iuml", "\\\"{I}", false, "&Iuml;", "I", "Ï", "Ï"),
("iuml", "\\\"{i}", false, "&iuml;", "i", "ï", "ï"),
("Ntilde", "\\~{N}", false, "&Ntilde;", "N", "Ñ", "Ñ"),
("ntilde", "\\~{n}", false, "&ntilde;", "n", "ñ", "ñ"),
("Ograve", "\\`{O}", false, "&Ograve;", "O", "Ò", "Ò"),
("ograve", "\\`{o}", false, "&ograve;", "o", "ò", "ò"),
("Oacute", "\\'{O}", false, "&Oacute;", "O", "Ó", "Ó"),
("oacute", "\\'{o}", false, "&oacute;", "o", "ó", "ó"),
("Ocirc", "\\^{O}", false, "&Ocirc;", "O", "Ô", "Ô"),
("ocirc", "\\^{o}", false, "&ocirc;", "o", "ô", "ô"),
("Otilde", "\\~{O}", false, "&Otilde;", "O", "Õ", "Õ"),
("otilde", "\\~{o}", false, "&otilde;", "o", "õ", "õ"),
("Ouml", "\\\"{O}", false, "&Ouml;", "Oe", "Ö", "Ö"),
("ouml", "\\\"{o}", false, "&ouml;", "oe", "ö", "ö"),
("Oslash", "\\O", false, "&Oslash;", "O", "Ø", "Ø"),
("oslash", "\\o{}", false, "&oslash;", "o", "ø", "ø"),
("OElig", "\\OE{}", false, "&OElig;", "OE", "OE", "Œ"),
("oelig", "\\oe{}", false, "&oelig;", "oe", "oe", "œ"),
("Scaron", "\\v{S}", false, "&Scaron;", "S", "S", "Š"),
("scaron", "\\v{s}", false, "&scaron;", "s", "s", "š"),
("szlig", "\\ss{}", false, "&szlig;", "ss", "ß", "ß"),
("Ugrave", "\\`{U}", false, "&Ugrave;", "U", "Ù", "Ù"),
("ugrave", "\\`{u}", false, "&ugrave;", "u", "ù", "ù"),
("Uacute", "\\'{U}", false, "&Uacute;", "U", "Ú", "Ú"),
("uacute", "\\'{u}", false, "&uacute;", "u", "ú", "ú"),
("Ucirc", "\\^{U}", false, "&Ucirc;", "U", "Û", "Û"),
("ucirc", "\\^{u}", false, "&ucirc;", "u", "û", "û"),
("Uuml", "\\\"{U}", false, "&Uuml;", "Ue", "Ü", "Ü"),
("uuml", "\\\"{u}", false, "&uuml;", "ue", "ü", "ü"),
("Yacute", "\\'{Y}", false, "&Yacute;", "Y", "Ý", "Ý"),
("yacute", "\\'{y}", false, "&yacute;", "y", "ý", "ý"),
("Yuml", "\\\"{Y}", false, "&Yuml;", "Y", "Y", "Ÿ"),
("yuml", "\\\"{y}", false, "&yuml;", "y", "ÿ", "ÿ"),
// Latin (special face)
("fnof", "\\textit{f}", false, "&fnof;", "f", "f", "ƒ"),
("real", "\\Re", true, "&real;", "R", "R", ""),
("image", "\\Im", true, "&image;", "I", "I", ""),
("weierp", "\\wp", true, "&weierp;", "P", "P", ""),
("ell", "\\ell", true, "&ell;", "ell", "ell", ""),
("imath", "\\imath", true, "&imath;", "[dotless i]", "dotless i", "ı"),
("jmath", "\\jmath", true, "&jmath;", "[dotless j]", "dotless j", "ȷ"),
// Greek
("Alpha", "A", false, "&Alpha;", "Alpha", "Alpha", "Α"),
("alpha", "\\alpha", true, "&alpha;", "alpha", "alpha", "α"),
("Beta", "B", false, "&Beta;", "Beta", "Beta", "Β"),
("beta", "\\beta", true, "&beta;", "beta", "beta", "β"),
("Gamma", "\\Gamma", true, "&Gamma;", "Gamma", "Gamma", "Γ"),
("gamma", "\\gamma", true, "&gamma;", "gamma", "gamma", "γ"),
("Delta", "\\Delta", true, "&Delta;", "Delta", "Delta", "Δ"),
("delta", "\\delta", true, "&delta;", "delta", "delta", "δ"),
("Epsilon", "E", false, "&Epsilon;", "Epsilon", "Epsilon", "Ε"),
("epsilon", "\\epsilon", true, "&epsilon;", "epsilon", "epsilon", "ε"),
("varepsilon", "\\varepsilon", true, "&epsilon;", "varepsilon", "varepsilon", "ε"),
("Zeta", "Z", false, "&Zeta;", "Zeta", "Zeta", "Ζ"),
("zeta", "\\zeta", true, "&zeta;", "zeta", "zeta", "ζ"),
("Eta", "H", false, "&Eta;", "Eta", "Eta", "Η"),
("eta", "\\eta", true, "&eta;", "eta", "eta", "η"),
("Theta", "\\Theta", true, "&Theta;", "Theta", "Theta", "Θ"),
("theta", "\\theta", true, "&theta;", "theta", "theta", "θ"),
("thetasym", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("vartheta", "\\vartheta", true, "&thetasym;", "theta", "theta", "ϑ"),
("Iota", "I", false, "&Iota;", "Iota", "Iota", "Ι"),
("iota", "\\iota", true, "&iota;", "iota", "iota", "ι"),
("Kappa", "K", false, "&Kappa;", "Kappa", "Kappa", "Κ"),
("kappa", "\\kappa", true, "&kappa;", "kappa", "kappa", "κ"),
("Lambda", "\\Lambda", true, "&Lambda;", "Lambda", "Lambda", "Λ"),
("lambda", "\\lambda", true, "&lambda;", "lambda", "lambda", "λ"),
("Mu", "M", false, "&Mu;", "Mu", "Mu", "Μ"),
("mu", "\\mu", true, "&mu;", "mu", "mu", "μ"),
("nu", "\\nu", true, "&nu;", "nu", "nu", "ν"),
("Nu", "N", false, "&Nu;", "Nu", "Nu", "Ν"),
("Xi", "\\Xi", true, "&Xi;", "Xi", "Xi", "Ξ"),
("xi", "\\xi", true, "&xi;", "xi", "xi", "ξ"),
("Omicron", "O", false, "&Omicron;", "Omicron", "Omicron", "Ο"),
("omicron", "\\textit{o}", false, "&omicron;", "omicron", "omicron", "ο"),
("Pi", "\\Pi", true, "&Pi;", "Pi", "Pi", "Π"),
("pi", "\\pi", true, "&pi;", "pi", "pi", "π"),
("Rho", "P", false, "&Rho;", "Rho", "Rho", "Ρ"),
("rho", "\\rho", true, "&rho;", "rho", "rho", "ρ"),
("Sigma", "\\Sigma", true, "&Sigma;", "Sigma", "Sigma", "Σ"),
("sigma", "\\sigma", true, "&sigma;", "sigma", "sigma", "σ"),
("sigmaf", "\\varsigma", true, "&sigmaf;", "sigmaf", "sigmaf", "ς"),
("varsigma", "\\varsigma", true, "&sigmaf;", "varsigma", "varsigma", "ς"),
("Tau", "T", false, "&Tau;", "Tau", "Tau", "Τ"),
("Upsilon", "\\Upsilon", true, "&Upsilon;", "Upsilon", "Upsilon", "Υ"),
("upsih", "\\Upsilon", true, "&upsih;", "upsilon", "upsilon", "ϒ"),
("upsilon", "\\upsilon", true, "&upsilon;", "upsilon", "upsilon", "υ"),
("Phi", "\\Phi", true, "&Phi;", "Phi", "Phi", "Φ"),
("phi", "\\phi", true, "&phi;", "phi", "phi", "ɸ"),
("varphi", "\\varphi", true, "&varphi;", "varphi", "varphi", "φ"),
("Chi", "X", false, "&Chi;", "Chi", "Chi", "Χ"),
("chi", "\\chi", true, "&chi;", "chi", "chi", "χ"),
("acutex", "\\acute x", true, "&acute;x", "'x", "'x", "𝑥́"),
("Psi", "\\Psi", true, "&Psi;", "Psi", "Psi", "Ψ"),
("psi", "\\psi", true, "&psi;", "psi", "psi", "ψ"),
("tau", "\\tau", true, "&tau;", "tau", "tau", "τ"),
("Omega", "\\Omega", true, "&Omega;", "Omega", "Omega", "Ω"),
("omega", "\\omega", true, "&omega;", "omega", "omega", "ω"),
("piv", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("varpi", "\\varpi", true, "&piv;", "omega-pi", "omega-pi", "ϖ"),
("partial", "\\partial", true, "&part;", "[partial differential]", "[partial differential]", ""),
// Hebrew
("alefsym", "\\aleph", true, "&alefsym;", "aleph", "aleph", ""),
("aleph", "\\aleph", true, "&aleph;", "aleph", "aleph", ""),
("gimel", "\\gimel", true, "&gimel;", "gimel", "gimel", ""),
("beth", "\\beth", true, "&beth;", "beth", "beth", "ב"),
("dalet", "\\daleth", true, "&daleth;", "dalet", "dalet", "ד"),
// Icelandic
("ETH", "\\DH{}", false, "&ETH;", "D", "Ð", "Ð"),
("eth", "\\dh{}", false, "&eth;", "dh", "ð", "ð"),
("THORN", "\\TH{}", false, "&THORN;", "TH", "Þ", "Þ"),
("thorn", "\\th{}", false, "&thorn;", "th", "þ", "þ"),
//, "* Punctuation",
// Dots and Marks
("dots", "\\dots{}", false, "&hellip;", "...", "...", ""),
("cdots", "\\cdots{}", true, "&ctdot;", "...", "...", ""),
("hellip", "\\dots{}", false, "&hellip;", "...", "...", ""),
("middot", "\\textperiodcentered{}", false, "&middot;", ".", "·", "·"),
("iexcl", "!`", false, "&iexcl;", "!", "¡", "¡"),
("iquest", "?`", false, "&iquest;", "?", "¿", "¿"),
// Dash-like
("shy", "\\-", false, "&shy;", "", "", ""),
("ndash", "--", false, "&ndash;", "-", "-", ""),
("mdash", "---", false, "&mdash;", "--", "--", ""),
// Quotations
("quot", "\\textquotedbl{}", false, "&quot;", "\"", "\"", "\""),
("acute", "\\textasciiacute{}", false, "&acute;", "'", "´", "´"),
("ldquo", "\\textquotedblleft{}", false, "&ldquo;", "\"", "\"", ""),
("rdquo", "\\textquotedblright{}", false, "&rdquo;", "\"", "\"", ""),
("bdquo", "\\quotedblbase{}", false, "&bdquo;", "\"", "\"", ""),
("lsquo", "\\textquoteleft{}", false, "&lsquo;", "`", "`", ""),
("rsquo", "\\textquoteright{}", false, "&rsquo;", "'", "'", ""),
("sbquo", "\\quotesinglbase{}", false, "&sbquo;", ", ", ", ", ""),
("laquo", "\\guillemotleft{}", false, "&laquo;", "<<", "«", "«"),
("raquo", "\\guillemotright{}", false, "&raquo;", ">>", "»", "»"),
("lsaquo", "\\guilsinglleft{}", false, "&lsaquo;", "<", "<", ""),
("rsaquo", "\\guilsinglright{}", false, "&rsaquo;", ">", ">", ""),
//, "* Other",
// Misc. (often used)
("circ", "\\^{}", false, "&circ;", "^", "^", ""),
("vert", "\\vert{}", true, "&vert;", "|", "|", "|"),
("vbar", "|", false, "|", "|", "|", "|"),
("brvbar", "\\textbrokenbar{}", false, "&brvbar;", "|", "¦", "¦"),
("S", "\\S", false, "&sect;", "section", "§", "§"),
("sect", "\\S", false, "&sect;", "section", "§", "§"),
("P", "\\P{}", false, "&para;", "paragraph", "", ""),
("para", "\\P{}", false, "&para;", "paragraph", "", ""),
("amp", "\\&", false, "&amp;", "&", "&", "&"),
("lt", "\\textless{}", false, "&lt;", "<", "<", "<"),
("gt", "\\textgreater{}", false, "&gt;", ">", ">", ">"),
("tilde", "\\textasciitilde{}", false, "~", "~", "~", "~"),
("slash", "/", false, "/", "/", "/", "/"),
("plus", "+", false, "+", "+", "+", "+"),
("under", "\\_", false, "_", "_", "_", "_"),
("equal", "=", false, "=", "=", "=", "="),
("asciicirc", "\\textasciicircum{}", false, "^", "^", "^", "^"),
("dagger", "\\textdagger{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("dag", "\\dag{}", false, "&dagger;", "[dagger]", "[dagger]", ""),
("Dagger", "\\textdaggerdbl{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
("ddag", "\\ddag{}", false, "&Dagger;", "[doubledagger]", "[doubledagger]", ""),
// Whitespace
("nbsp", "~", false, "&nbsp;", ", ", "\\x00A0", "\\x00A0"),
("ensp", "\\hspace*{.5em}", false, "&ensp;", ", ", ", ", ""),
("emsp", "\\hspace*{1em}", false, "&emsp;", ", ", ", ", ""),
("thinsp", "\\hspace*{.2em}", false, "&thinsp;", ", ", ", ", ""),
// Currency
("curren", "\\textcurrency{}", false, "&curren;", "curr.", "¤", "¤"),
("cent", "\\textcent{}", false, "&cent;", "cent", "¢", "¢"),
("pound", "\\pounds{}", false, "&pound;", "pound", "£", "£"),
("yen", "\\textyen{}", false, "&yen;", "yen", "¥", "¥"),
("euro", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("EUR", "\\texteuro{}", false, "&euro;", "EUR", "EUR", ""),
("dollar", "\\$", false, "$", "$", "$", "$"),
("USD", "\\$", false, "$", "$", "$", "$"),
// Property Marks
("copy", "\\textcopyright{}", false, "&copy;", "(c)", "©", "©"),
("reg", "\\textregistered{}", false, "&reg;", "(r)", "®", "®"),
("trade", "\\texttrademark{}", false, "&trade;", "TM", "TM", ""),
// Science, etrueal.
("minus", "-", true, "&minus;", "-", "-", ""),
("pm", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("plusmn", "\\textpm{}", false, "&plusmn;", "+-", "±", "±"),
("times", "\\texttimes{}", false, "&times;", "*", "×", "×"),
("frasl", "/", false, "&frasl;", "/", "/", ""),
("colon", "\\colon", true, ":", ":", ":", ":"),
("div", "\\textdiv{}", false, "&divide;", "/", "÷", "÷"),
("frac12", "\\textonehalf{}", false, "&frac12;", "1/2", "½", "½"),
("frac14", "\\textonequarter{}", false, "&frac14;", "1/4", "¼", "¼"),
("frac34", "\\textthreequarters{}", false, "&frac34;", "3/4", "¾", "¾"),
("permil", "\\textperthousand{}", false, "&permil;", "per thousand", "per thousand", ""),
("sup1", "\\textonesuperior{}", false, "&sup1;", "^1", "¹", "¹"),
("sup2", "\\texttwosuperior{}", false, "&sup2;", "^2", "²", "²"),
("sup3", "\\textthreesuperior{}", false, "&sup3;", "^3", "³", "³"),
("radic", "\\sqrt{\\,}", true, "&radic;", "[square root]", "[square root]", ""),
("sum", "\\sum", true, "&sum;", "[sum]", "[sum]", ""),
("prod", "\\prod", true, "&prod;", "[product]", "[n-ary product]", ""),
("micro", "\\textmu{}", false, "&micro;", "micro", "µ", "µ"),
("macr", "\\textasciimacron{}", false, "&macr;", "[macron]", "¯", "¯"),
("deg", "\\textdegree{}", false, "&deg;", "degree", "°", "°"),
("prime", "\\prime", true, "&prime;", "'", "'", ""),
("Prime", "\\prime{}\\prime", true, "&Prime;", "''", "''", ""),
("infin", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("infty", "\\infty", true, "&infin;", "[infinity]", "[infinity]", ""),
("prop", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("propto", "\\propto", true, "&prop;", "[proportional to]", "[proportional to]", ""),
("not", "\\textlnot{}", false, "&not;", "[angled dash]", "¬", "¬"),
("neg", "\\neg{}", true, "&not;", "[angled dash]", "¬", "¬"),
("land", "\\land", true, "&and;", "[logical and]", "[logical and]", ""),
("wedge", "\\wedge", true, "&and;", "[logical and]", "[logical and]", ""),
("lor", "\\lor", true, "&or;", "[logical or]", "[logical or]", ""),
("vee", "\\vee", true, "&or;", "[logical or]", "[logical or]", ""),
("cap", "\\cap", true, "&cap;", "[intersection]", "[intersection]", ""),
("cup", "\\cup", true, "&cup;", "[union]", "[union]", ""),
("smile", "\\smile", true, "&smile;", "[cup product]", "[cup product]", ""),
("frown", "\\frown", true, "&frown;", "[Cap product]", "[cap product]", ""),
("int", "\\int", true, "&int;", "[integral]", "[integral]", ""),
("therefore", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("there4", "\\therefore", true, "&there4;", "[therefore]", "[therefore]", ""),
("because", "\\because", true, "&because;", "[because]", "[because]", ""),
("sim", "\\sim", true, "&sim;", "~", "~", ""),
("cong", "\\cong", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("simeq", "\\simeq", true, "&cong;", "[approx. equal to]", "[approx. equal to]", ""),
("asymp", "\\asymp", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("approx", "\\approx", true, "&asymp;", "[, almostrueequal to]", "[, almostrueequal to]", ""),
("ne", "\\ne", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("neq", "\\neq", true, "&ne;", "[, notrueequal to]", "[, notrueequal to]", ""),
("equiv", "\\equiv", true, "&equiv;", "[identical to]", "[identical to]", ""),
("triangleq", "\\triangleq", true, "&triangleq;", "[defined to]", "[defined to]", ""),
("le", "\\le", true, "&le;", "<=", "<=", ""),
("leq", "\\le", true, "&le;", "<=", "<=", ""),
("ge", "\\ge", true, "&ge;", ">=", ">=", ""),
("geq", "\\ge", true, "&ge;", ">=", ">=", ""),
("lessgtr", "\\lessgtr", true, "&lessgtr;", "[less than or greater than]", "[less than or greater than]", ""),
("lesseqgtr", "\\lesseqgtr", true, "&lesseqgtr;", "[less than or equal or greater than or equal]", "[less than or equal or greater than or equal]", ""),
("ll", "\\ll", true, "&Lt;", "<<", "<<", ""),
("Ll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("lll", "\\lll", true, "&Ll;", "<<<", "<<<", ""),
("gg", "\\gg", true, "&Gt;", ">>", ">>", ""),
("Gg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("ggg", "\\ggg", true, "&Gg;", ">>>", ">>>", ""),
("prec", "\\prec", true, "&pr;", "[precedes]", "[precedes]", ""),
("preceq", "\\preceq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("preccurlyeq", "\\preccurlyeq", true, "&prcue;", "[precedes or equal]", "[precedes or equal]", ""),
("succ", "\\succ", true, "&sc;", "[succeeds]", "[succeeds]", ""),
("succeq", "\\succeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("succcurlyeq", "\\succcurlyeq", true, "&sccue;", "[succeeds or equal]", "[succeeds or equal]", ""),
("sub", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("subset", "\\subset", true, "&sub;", "[, subsetrueof]", "[, subsetrueof]", ""),
("sup", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("supset", "\\supset", true, "&sup;", "[, supersetrueof]", "[, supersetrueof]", ""),
("nsub", "\\not\\subset", true, "&nsub;", "[, notruea, subsetrueof]", "[, notruea, subsetrueof", ""),
("sube", "\\subseteq", true, "&sube;", "[, subsetrueof or equal to]", "[, subsetrueof or equal to]", ""),
("nsup", "\\not\\supset", true, "&nsup;", "[, notruea, supersetrueof]", "[, notruea, supersetrueof]", ""),
("supe", "\\supseteq", true, "&supe;", "[, supersetrueof or equal to]", "[, supersetrueof or equal to]", ""),
("setminus", "\\setminus", true, "&setminus;", "\\", "\\", ""),
("forall", "\\forall", true, "&forall;", "[for all]", "[for all]", ""),
("exist", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("exists", "\\exists", true, "&exist;", "[there exists]", "[there exists]", ""),
("nexist", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("nexists", "\\nexists", true, "&exist;", "[there does, notrueexists]", "[there does, notrue exists]", ""),
("empty", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("emptyset", "\\emptyset", true, "&empty;", "[empty set]", "[empty set]", ""),
("isin", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("in", "\\in", true, "&isin;", "[, elementrueof]", "[, elementrueof]", ""),
("notin", "\\notin", true, "&notin;", "[, notruean, elementrueof]", "[, notruean, elementrueof]", ""),
("ni", "\\ni", true, "&ni;", "[contains as member]", "[contains as member]", ""),
("nabla", "\\nabla", true, "&nabla;", "[nabla]", "[nabla]", ""),
("ang", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("angle", "\\angle", true, "&ang;", "[angle]", "[angle]", ""),
("perp", "\\perp", true, "&perp;", "[up tack]", "[up tack]", ""),
("parallel", "\\parallel", true, "&parallel;", "||", "||", ""),
("sdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("cdot", "\\cdot", true, "&sdot;", "[dot]", "[dot]", ""),
("lceil", "\\lceil", true, "&lceil;", "[, leftrueceiling]", "[, leftrueceiling]", ""),
("rceil", "\\rceil", true, "&rceil;", "[, rightrueceiling]", "[, rightrueceiling]", ""),
("lfloor", "\\lfloor", true, "&lfloor;", "[, leftruefloor]", "[, leftruefloor]", ""),
("rfloor", "\\rfloor", true, "&rfloor;", "[, rightruefloor]", "[, rightruefloor]", ""),
("lang", "\\langle", true, "&lang;", "<", "<", ""),
("rang", "\\rangle", true, "&rang;", ">", ">", ""),
("langle", "\\langle", true, "&lang;", "<", "<", ""),
("rangle", "\\rangle", true, "&rang;", ">", ">", ""),
("hbar", "\\hbar", true, "&hbar;", "hbar", "hbar", ""),
("mho", "\\mho", true, "&mho;", "mho", "mho", ""),
// Arrows
("larr", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("leftarrow", "\\leftarrow", true, "&larr;", "<-", "<-", ""),
("gets", "\\gets", true, "&larr;", "<-", "<-", ""),
("lArr", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("Leftarrow", "\\Leftarrow", true, "&lArr;", "<=", "<=", ""),
("uarr", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uparrow", "\\uparrow", true, "&uarr;", "[uparrow]", "[uparrow]", ""),
("uArr", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("Uparrow", "\\Uparrow", true, "&uArr;", "[dbluparrow]", "[dbluparrow]", ""),
("rarr", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("to", "\\to", true, "&rarr;", "->", "->", ""),
("rightarrow", "\\rightarrow", true, "&rarr;", "->", "->", ""),
("rArr", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("Rightarrow", "\\Rightarrow", true, "&rArr;", "=>", "=>", ""),
("darr", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("downarrow", "\\downarrow", true, "&darr;", "[downarrow]", "[downarrow]", ""),
("dArr", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("Downarrow", "\\Downarrow", true, "&dArr;", "[dbldownarrow]", "[dbldownarrow]", ""),
("harr", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("leftrightarrow", "\\leftrightarrow", true, "&harr;", "<->", "<->", ""),
("hArr", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("Leftrightarrow", "\\Leftrightarrow", true, "&hArr;", "<=>", "<=>", ""),
("crarr", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
("hookleftarrow", "\\hookleftarrow", true, "&crarr;", "<-'", "<-'", ""),
// Function names
("arccos", "\\arccos", true, "arccos", "arccos", "arccos", "arccos"),
("arcsin", "\\arcsin", true, "arcsin", "arcsin", "arcsin", "arcsin"),
("arctan", "\\arctan", true, "arctan", "arctan", "arctan", "arctan"),
("arg", "\\arg", true, "arg", "arg", "arg", "arg"),
("cos", "\\cos", true, "cos", "cos", "cos", "cos"),
("cosh", "\\cosh", true, "cosh", "cosh", "cosh", "cosh"),
("cot", "\\cot", true, "cot", "cot", "cot", "cot"),
("coth", "\\coth", true, "coth", "coth", "coth", "coth"),
("csc", "\\csc", true, "csc", "csc", "csc", "csc"),
("deg", "\\deg", true, "&deg;", "deg", "deg", "deg"),
("det", "\\det", true, "det", "det", "det", "det"),
("dim", "\\dim", true, "dim", "dim", "dim", "dim"),
("exp", "\\exp", true, "exp", "exp", "exp", "exp"),
("gcd", "\\gcd", true, "gcd", "gcd", "gcd", "gcd"),
("hom", "\\hom", true, "hom", "hom", "hom", "hom"),
("inf", "\\inf", true, "inf", "inf", "inf", "inf"),
("ker", "\\ker", true, "ker", "ker", "ker", "ker"),
("lg", "\\lg", true, "lg", "lg", "lg", "lg"),
("lim", "\\lim", true, "lim", "lim", "lim", "lim"),
("liminf", "\\liminf", true, "liminf", "liminf", "liminf", "liminf"),
("limsup", "\\limsup", true, "limsup", "limsup", "limsup", "limsup"),
("ln", "\\ln", true, "ln", "ln", "ln", "ln"),
("log", "\\log", true, "log", "log", "log", "log"),
("max", "\\max", true, "max", "max", "max", "max"),
("min", "\\min", true, "min", "min", "min", "min"),
("Pr", "\\Pr", true, "Pr", "Pr", "Pr", "Pr"),
("sec", "\\sec", true, "sec", "sec", "sec", "sec"),
("sin", "\\sin", true, "sin", "sin", "sin", "sin"),
("sinh", "\\sinh", true, "sinh", "sinh", "sinh", "sinh"),
("sup", "\\sup", true, "&sup;", "sup", "sup", "sup"),
("tan", "\\tan", true, "tan", "tan", "tan", "tan"),
("tanh", "\\tanh", true, "tanh", "tanh", "tanh", "tanh"),
// Signs & Symbols
("bull", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("bullet", "\\textbullet{}", false, "&bull;", "*", "*", ""),
("star", "\\star", true, "*", "*", "*", ""),
("lowast", "\\ast", true, "&lowast;", "*", "*", ""),
("ast", "\\ast", true, "&lowast;", "*", "*", "*"),
("odot", "\\odot", true, "o", "[circled dot]", "[circled dot]", "ʘ"),
("oplus", "\\oplus", true, "&oplus;", "[circled plus]", "[circled plus]", ""),
("otimes", "\\otimes", true, "&otimes;", "[circled times]", "[circled times]", ""),
("check", "\\checkmark", true, "&checkmark;", "[checkmark]", "[checkmark]", ""),
("checkmark", "\\checkmark", true, "&check;", "[checkmark]", "[checkmark]", ""),
// Miscellaneous (seldom used)
("ordf", "\\textordfeminine{}", false, "&ordf;", "_a_", "ª", "ª"),
("ordm", "\\textordmasculine{}", false, "&ordm;", "_o_", "º", "º"),
("cedil", "\\c{}", false, "&cedil;", "[cedilla]", "¸", "¸"),
("oline", "\\overline{~}", true, "&oline;", "[overline]", "¯", ""),
("uml", "\\textasciidieresis{}", false, "&uml;", "[diaeresis]", "¨", "¨"),
("zwnj", "\\/{}", false, "&zwnj;", "", "", ""),
("zwj", "", false, "&zwj;", "", "", ""),
("lrm", "", false, "&lrm;", "", "", "\u{200E}"),
("rlm", "", false, "&rlm;", "", "", "\u{200F}"),
// Smilies
("smiley", "\\ddot\\smile", true, "&#9786;", ":-)", ":-)", ""),
("blacksmile", "\\ddot\\smile", true, "&#9787;", ":-)", ":-)", ""),
("sad", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
("frowny", "\\ddot\\frown", true, "&#9785;", ":-(", ":-(", ""),
// Suits
("clubs", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("clubsuit", "\\clubsuit", true, "&clubs;", "[clubs]", "[clubs]", ""),
("spades", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("spadesuit", "\\spadesuit", true, "&spades;", "[spades]", "[spades]", ""),
("hearts", "\\heartsuit", true, "&hearts;", "[hearts]", "[hearts]", ""),
("heartsuit", "\\heartsuit", true, "&heartsuit;", "[hearts]", "[hearts]", ""),
("diams", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamondsuit", "\\diamondsuit", true, "&diams;", "[diamonds]", "[diamonds]", ""),
("diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("Diamond", "\\diamondsuit", true, "&diamond;", "[diamond]", "[diamond]", ""),
("loz", "\\lozenge", true, "&loz;", "[lozenge]", "[lozenge]", ""),
// spaces
// fish shell:
// for i in (seq 1 20)
// echo '("'(string repeat -n $i ' ')'", "\\\\hspace*{'(math '0.5*'$i)'em}", true, "'(string repeat -n $i '&ensp;')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i ' ')'", "'(string repeat -n $i '\\\\x2002')'")'
// end
(" ", "\\hspace*{0.5em}", true, "&ensp;", " ", " ", "\\x2002"),
(" ", "\\hspace*{1em}", true, "&ensp;&ensp;", " ", " ", "\\x2002\\x2002"),
(" ", "\\hspace*{1.5em}", true, "&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2em}", true, "&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{2.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{3.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{4.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{5.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{6.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{7.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{8.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{9.5em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
(" ", "\\hspace*{10em}", true, "&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;", " ", " ", "\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002\\x2002"),
];

View file

@ -1,73 +0,0 @@
use crate::ast::*;
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Container {
Document(Document),
Section(Section),
Paragraph(Paragraph),
Headline(Headline),
OrgTable(OrgTable),
OrgTableRow(OrgTableRow),
OrgTableCell(OrgTableCell),
TableEl(TableEl),
List(List),
ListItem(ListItem),
Drawer(Drawer),
DynBlock(DynBlock),
FnDef(FnDef),
Comment(Comment),
FixedWidth(FixedWidth),
SpecialBlock(SpecialBlock),
QuoteBlock(QuoteBlock),
CenterBlock(CenterBlock),
VerseBlock(VerseBlock),
CommentBlock(CommentBlock),
ExampleBlock(ExampleBlock),
ExportBlock(ExportBlock),
SourceBlock(SourceBlock),
Link(Link),
RadioTarget(RadioTarget),
FnRef(FnRef),
Target(Target),
Bold(Bold),
Strike(Strike),
Italic(Italic),
Underline(Underline),
Verbatim(Verbatim),
Code(Code),
Superscript(Superscript),
Subscript(Subscript),
BabelCall(BabelCall),
PropertyDrawer(PropertyDrawer),
AffiliatedKeyword(AffiliatedKeyword),
Keyword(Keyword),
}
#[non_exhaustive]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Event {
Enter(Container),
Leave(Container),
Text(Token),
Macros(Macros),
Cookie(Cookie),
InlineCall(InlineCall),
InlineSrc(InlineSrc),
Clock(Clock),
LineBreak(LineBreak),
Snippet(Snippet),
Rule(Rule),
Timestamp(Timestamp),
LatexFragment(LatexFragment),
LatexEnvironment(LatexEnvironment),
Entity(Entity),
#[cfg(feature = "syntax-org-fc")]
Cloze(Cloze),
}

View file

@ -1,12 +1,10 @@
use rowan::NodeOrToken;
use std::cmp::min;
use std::fmt;
use std::fmt::Write as _;
use std::io::{Error, Result as IOResult, Write};
use super::event::{Container, Event};
use super::TraversalContext;
use super::Traverser;
use crate::{SyntaxElement, SyntaxKind, SyntaxNode};
use jetscii::{bytes, BytesConst};
use crate::elements::{Element, Table, TableCell, TableRow, Timestamp};
use crate::export::write_datetime;
/// A wrapper for escaping sensitive characters in html.
///
@ -28,7 +26,11 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
let content = self.0.as_ref();
let bytes = content.as_bytes();
while let Some(off) = jetscii::bytes!(b'<', b'>', b'&', b'\'', b'"').find(&bytes[pos..]) {
lazy_static::lazy_static! {
static ref ESCAPE_BYTES: BytesConst = bytes!(b'<', b'>', b'&', b'\'', b'"');
}
while let Some(off) = ESCAPE_BYTES.find(&bytes[pos..]) {
write!(f, "{}", &content[pos..pos + off])?;
pos += off + 1;
@ -39,7 +41,7 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
b'&' => write!(f, "&amp;")?,
b'\'' => write!(f, "&apos;")?,
b'"' => write!(f, "&quot;")?,
_ => {}
_ => unreachable!(),
}
}
@ -47,295 +49,349 @@ impl<S: AsRef<str>> fmt::Display for HtmlEscape<S> {
}
}
pub trait HtmlHandler<E: From<Error>>: Default {
fn start<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
}
/// Default Html Handler
#[derive(Default)]
pub struct HtmlExport {
output: String,
pub struct DefaultHtmlHandler;
in_descriptive_list: Vec<bool>,
impl HtmlHandler<Error> for DefaultHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(_) => (),
Element::QuoteBlock(_) => write!(w, "<blockquote>")?,
Element::CenterBlock(_) => write!(w, "<div class=\"center\">")?,
Element::VerseBlock(_) => write!(w, "<p class=\"verse\">")?,
Element::Bold => write!(w, "<b>")?,
Element::Document { .. } => write!(w, "<main>")?,
Element::DynBlock(_dyn_block) => (),
Element::Headline { .. } => (),
Element::List(list) => {
if list.ordered {
write!(w, "<ol>")?;
} else {
write!(w, "<ul>")?;
}
}
Element::Italic => write!(w, "<i>")?,
Element::ListItem(_) => write!(w, "<li>")?,
Element::Paragraph { .. } => write!(w, "<p>")?,
Element::Section => write!(w, "<section>")?,
Element::Strike => write!(w, "<s>")?,
Element::Underline => write!(w, "<u>")?,
// non-container elements
Element::CommentBlock(_) => (),
Element::ExampleBlock(block) => write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&block.contents)
)?,
Element::ExportBlock(block) => {
if block.data.eq_ignore_ascii_case("HTML") {
write!(w, "{}", block.contents)?
}
}
Element::SourceBlock(block) => {
if block.language.is_empty() {
write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&block.contents)
)?;
} else {
write!(
w,
"<div class=\"org-src-container\"><pre class=\"src src-{}\">{}</pre></div>",
block.language,
HtmlEscape(&block.contents)
)?;
}
}
Element::BabelCall(_) => (),
Element::InlineSrc(inline_src) => write!(
w,
"<code class=\"src src-{}\">{}</code>",
inline_src.lang,
HtmlEscape(&inline_src.body)
)?,
Element::Code { value } => write!(w, "<code>{}</code>", HtmlEscape(value))?,
Element::FnRef(_fn_ref) => (),
Element::InlineCall(_) => (),
Element::Link(link) => write!(
w,
"<a href=\"{}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(link.desc.as_ref().unwrap_or(&link.path)),
)?,
Element::Macros(_macros) => (),
Element::RadioTarget => (),
Element::Snippet(snippet) => {
if snippet.name.eq_ignore_ascii_case("HTML") {
write!(w, "{}", snippet.value)?;
}
}
Element::Target(_target) => (),
Element::Text { value } => write!(w, "{}", HtmlEscape(value))?,
Element::Timestamp(timestamp) => {
write!(
&mut w,
"<span class=\"timestamp-wrapper\"><span class=\"timestamp\">"
)?;
table_row: TableRow,
}
match timestamp {
Timestamp::Active { start, .. } => {
write_datetime(&mut w, "&lt;", start, "&gt;")?;
}
Timestamp::Inactive { start, .. } => {
write_datetime(&mut w, "[", start, "]")?;
}
Timestamp::ActiveRange { start, end, .. } => {
write_datetime(&mut w, "&lt;", start, "&gt;&#x2013;")?;
write_datetime(&mut w, "&lt;", end, "&gt;")?;
}
Timestamp::InactiveRange { start, end, .. } => {
write_datetime(&mut w, "[", start, "]&#x2013;")?;
write_datetime(&mut w, "[", end, "]")?;
}
Timestamp::Diary { value } => {
write!(&mut w, "&lt;%%({})&gt;", HtmlEscape(value))?
}
}
#[derive(Default, PartialEq, Eq)]
enum TableRow {
#[default]
HeaderRule,
Header,
BodyRule,
Body,
}
write!(&mut w, "</span></span>")?;
}
Element::Verbatim { value } => write!(&mut w, "<code>{}</code>", HtmlEscape(value))?,
Element::FnDef(_fn_def) => (),
Element::Clock(_clock) => (),
Element::Comment(_) => (),
Element::FixedWidth(fixed_width) => write!(
w,
"<pre class=\"example\">{}</pre>",
HtmlEscape(&fixed_width.value)
)?,
Element::Keyword(_keyword) => (),
Element::Drawer(_drawer) => (),
Element::Rule(_) => write!(w, "<hr>")?,
Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?,
Element::Title(title) => {
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?;
}
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { has_header, .. }) => {
write!(w, "<table>")?;
if *has_header {
write!(w, "<thead>")?;
} else {
write!(w, "<tbody>")?;
}
}
Element::TableRow(row) => match row {
TableRow::Body => write!(w, "<tr>")?,
TableRow::BodyRule => write!(w, "</tbody><tbody>")?,
TableRow::Header => write!(w, "<tr>")?,
TableRow::HeaderRule => write!(w, "</thead><tbody>")?,
},
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "<td>")?,
TableCell::Header => write!(w, "<th>")?,
},
}
impl HtmlExport {
pub fn push_str(&mut self, s: impl AsRef<str>) {
self.output += s.as_ref();
Ok(())
}
pub fn finish(self) -> String {
self.output
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(_) => (),
Element::QuoteBlock(_) => write!(w, "</blockquote>")?,
Element::CenterBlock(_) => write!(w, "</div>")?,
Element::VerseBlock(_) => write!(w, "</p>")?,
Element::Bold => write!(w, "</b>")?,
Element::Document { .. } => write!(w, "</main>")?,
Element::DynBlock(_dyn_block) => (),
Element::Headline { .. } => (),
Element::List(list) => {
if list.ordered {
write!(w, "</ol>")?;
} else {
write!(w, "</ul>")?;
}
}
Element::Italic => write!(w, "</i>")?,
Element::ListItem(_) => write!(w, "</li>")?,
Element::Paragraph { .. } => write!(w, "</p>")?,
Element::Section => write!(w, "</section>")?,
Element::Strike => write!(w, "</s>")?,
Element::Underline => write!(w, "</u>")?,
Element::Title(title) => {
write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })?
}
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { .. }) => {
write!(w, "</tbody></table>")?;
}
Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => {
write!(w, "</tr>")?;
}
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "</td>")?,
TableCell::Header => write!(w, "</th>")?,
},
// non-container elements
_ => debug_assert!(!element.is_container()),
}
/// Render syntax node to html string
Ok(())
}
}
#[cfg(feature = "syntect")]
mod syntect_handler {
use super::*;
use std::marker::PhantomData;
use syntect::{
easy::HighlightLines,
highlighting::ThemeSet,
html::{styled_line_to_highlighted_html, IncludeBackground},
parsing::SyntaxSet,
};
/// Syntect Html Handler
///
/// Simple Usage:
///
/// ```rust
/// use orgize::{Org, ast::Bold, export::HtmlExport, rowan::ast::AstNode};
/// use orgize::Org;
/// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler};
///
/// let org = Org::parse("* /hello/ *world*");
/// let bold = org.first_node::<Bold>().unwrap();
/// let mut html = HtmlExport::default();
/// html.render(bold.syntax());
/// assert_eq!(html.finish(), "<b>world</b>");
/// let mut handler = SyntectHtmlHandler::new(DefaultHtmlHandler);
/// let org = Org::parse("src_rust{println!(\"Hello\")}");
///
/// let mut vec = vec![];
///
/// org.write_html_custom(&mut vec, &mut handler).unwrap();
/// ```
pub fn render(&mut self, node: &SyntaxNode) {
let mut ctx = TraversalContext::default();
self.element(SyntaxElement::Node(node.clone()), &mut ctx);
///
/// Customize:
///
/// ```rust,no_run
/// // orgize has re-exported the whole syntect crate
/// use orgize::syntect::parsing::SyntaxSet;
/// use orgize::export::{DefaultHtmlHandler, SyntectHtmlHandler};
///
/// let mut handler = SyntectHtmlHandler {
/// syntax_set: {
/// let set = SyntaxSet::load_defaults_newlines();
/// let mut builder = set.into_builder();
/// // add extra language syntax
/// builder.add_from_folder("path/to/syntax/dir", true).unwrap();
/// builder.build()
/// },
/// // specify theme
/// theme: String::from("Solarized (dark)"),
/// inner: DefaultHtmlHandler,
/// ..Default::default()
/// };
///
/// // Make sure to check if theme presents or it will panic at runtime
/// if handler.theme_set.themes.contains_key("dont-exists") {
///
/// }
/// ```
pub struct SyntectHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// syntax set, default is `SyntaxSet::load_defaults_newlines()`
pub syntax_set: SyntaxSet,
/// theme set, default is `ThemeSet::load_defaults()`
pub theme_set: ThemeSet,
/// theme used for highlighting, default is `"InspiredGitHub"`
pub theme: String,
/// inner html handler
pub inner: H,
/// background color, default is `IncludeBackground::No`
pub background: IncludeBackground,
/// handler error type
pub error_type: PhantomData<E>,
}
}
impl Traverser for HtmlExport {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
match event {
Event::Enter(Container::Document(_)) => self.output += "<main>",
Event::Leave(Container::Document(_)) => self.output += "</main>",
Event::Enter(Container::Headline(headline)) => {
let level = min(headline.level(), 6);
let _ = write!(&mut self.output, "<h{level}>");
for elem in headline.title() {
self.element(elem, ctx);
}
let _ = write!(&mut self.output, "</h{level}>");
impl<E: From<Error>, H: HtmlHandler<E>> SyntectHtmlHandler<E, H> {
pub fn new(inner: H) -> Self {
SyntectHtmlHandler {
inner,
..Default::default()
}
Event::Leave(Container::Headline(_)) => {}
}
Event::Enter(Container::Paragraph(_)) => self.output += "<p>",
Event::Leave(Container::Paragraph(_)) => self.output += "</p>",
fn highlight(&self, language: Option<&str>, content: &str) -> String {
let mut highlighter = HighlightLines::new(
language
.and_then(|lang| self.syntax_set.find_syntax_by_token(lang))
.unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()),
&self.theme_set.themes[&self.theme],
);
let regions = highlighter.highlight(content, &self.syntax_set);
styled_line_to_highlighted_html(&regions[..], self.background)
}
}
Event::Enter(Container::Section(_)) => self.output += "<section>",
Event::Leave(Container::Section(_)) => self.output += "</section>",
Event::Enter(Container::Italic(_)) => self.output += "<i>",
Event::Leave(Container::Italic(_)) => self.output += "</i>",
Event::Enter(Container::Bold(_)) => self.output += "<b>",
Event::Leave(Container::Bold(_)) => self.output += "</b>",
Event::Enter(Container::Strike(_)) => self.output += "<s>",
Event::Leave(Container::Strike(_)) => self.output += "</s>",
Event::Enter(Container::Underline(_)) => self.output += "<u>",
Event::Leave(Container::Underline(_)) => self.output += "</u>",
Event::Enter(Container::Verbatim(_)) => self.output += "<code>",
Event::Leave(Container::Verbatim(_)) => self.output += "</code>",
Event::Enter(Container::Code(_)) => self.output += "<code>",
Event::Leave(Container::Code(_)) => self.output += "</code>",
Event::Enter(Container::SourceBlock(block)) => {
if let Some(language) = block.language() {
let _ = write!(
&mut self.output,
r#"<pre><code class="language-{}">"#,
HtmlEscape(&language)
);
} else {
self.output += r#"<pre><code>"#
}
impl<E: From<Error>, H: HtmlHandler<E>> Default for SyntectHtmlHandler<E, H> {
fn default() -> Self {
SyntectHtmlHandler {
syntax_set: SyntaxSet::load_defaults_newlines(),
theme_set: ThemeSet::load_defaults(),
theme: String::from("InspiredGitHub"),
inner: H::default(),
background: IncludeBackground::No,
error_type: PhantomData,
}
Event::Leave(Container::SourceBlock(_)) => self.output += "</code></pre>",
}
}
Event::Enter(Container::QuoteBlock(_)) => self.output += "<blockquote>",
Event::Leave(Container::QuoteBlock(_)) => self.output += "</blockquote>",
Event::Enter(Container::VerseBlock(_)) => self.output += "<p class=\"verse\">",
Event::Leave(Container::VerseBlock(_)) => self.output += "</p>",
Event::Enter(Container::ExampleBlock(_)) => self.output += "<pre class=\"example\">",
Event::Leave(Container::ExampleBlock(_)) => self.output += "</pre>",
Event::Enter(Container::CenterBlock(_)) => self.output += "<div class=\"center\">",
Event::Leave(Container::CenterBlock(_)) => self.output += "</div>",
Event::Enter(Container::CommentBlock(_)) => self.output += "<!--",
Event::Leave(Container::CommentBlock(_)) => self.output += "-->",
Event::Enter(Container::Comment(_)) => self.output += "<!--",
Event::Leave(Container::Comment(_)) => self.output += "-->",
Event::Enter(Container::Subscript(_)) => self.output += "<sub>",
Event::Leave(Container::Subscript(_)) => self.output += "</sub>",
Event::Enter(Container::Superscript(_)) => self.output += "<sup>",
Event::Leave(Container::Superscript(_)) => self.output += "</sup>",
Event::Enter(Container::List(list)) => {
self.output += if list.is_ordered() {
self.in_descriptive_list.push(false);
"<ol>"
} else if list.is_descriptive() {
self.in_descriptive_list.push(true);
"<dl>"
} else {
self.in_descriptive_list.push(false);
"<ul>"
};
}
Event::Leave(Container::List(list)) => {
self.output += if list.is_ordered() {
"</ol>"
} else if let Some(true) = self.in_descriptive_list.last() {
"</dl>"
} else {
"</ul>"
};
self.in_descriptive_list.pop();
}
Event::Enter(Container::ListItem(list_item)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "<dt>";
for elem in list_item.tag() {
self.element(elem, ctx);
}
self.output += "</dt><dd>";
} else {
self.output += "<li>";
}
}
Event::Leave(Container::ListItem(_)) => {
if let Some(&true) = self.in_descriptive_list.last() {
self.output += "</dd>";
} else {
self.output += "</li>";
}
}
Event::Enter(Container::OrgTable(table)) => {
self.output += "<table>";
self.table_row = if table.has_header() {
TableRow::HeaderRule
} else {
TableRow::BodyRule
}
}
Event::Leave(Container::OrgTable(_)) => {
match self.table_row {
TableRow::Body => self.output += "</tbody>",
TableRow::Header => self.output += "</thead>",
_ => {}
}
self.output += "</table>";
}
Event::Enter(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
match self.table_row {
TableRow::HeaderRule => {
self.table_row = TableRow::Header;
self.output += "<thead>";
}
TableRow::BodyRule => {
self.table_row = TableRow::Body;
self.output += "<tbody>";
}
_ => {}
}
self.output += "<tr>";
}
}
Event::Leave(Container::OrgTableRow(row)) => {
if row.is_rule() {
match self.table_row {
TableRow::Body => {
self.output += "</tbody>";
self.table_row = TableRow::BodyRule;
}
TableRow::Header => {
self.output += "</thead>";
self.table_row = TableRow::BodyRule;
}
_ => {}
}
ctx.skip();
} else {
self.output += "</tr>";
}
}
Event::Enter(Container::OrgTableCell(_)) => self.output += "<td>",
Event::Leave(Container::OrgTableCell(_)) => self.output += "</td>",
Event::Enter(Container::Link(link)) => {
let path = link.path();
let path = path.trim_start_matches("file:");
if link.is_image() {
let _ = write!(&mut self.output, r#"<img src="{}">"#, HtmlEscape(&path));
return ctx.skip();
}
let _ = write!(&mut self.output, r#"<a href="{}">"#, HtmlEscape(&path));
if !link.has_description() {
let _ = write!(&mut self.output, "{}</a>", HtmlEscape(&path));
ctx.skip();
}
}
Event::Leave(Container::Link(_)) => self.output += "</a>",
Event::Text(text) => {
let _ = write!(&mut self.output, "{}", HtmlEscape(text));
}
Event::LineBreak(_) => self.output += "<br/>",
Event::Snippet(snippet) => {
if snippet.backend().eq_ignore_ascii_case("html") {
self.output += &snippet.value();
}
}
Event::Rule(_) => self.output += "<hr/>",
Event::Timestamp(timestamp) => {
self.output += r#"<span class="timestamp-wrapper"><span class="timestamp">"#;
for e in timestamp.syntax.children_with_tokens() {
match e {
NodeOrToken::Token(t) if t.kind() == SyntaxKind::MINUS2 => {
self.output += "&#x2013;";
}
NodeOrToken::Token(t) => {
self.output += t.text();
}
_ => {}
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for SyntectHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
match element {
Element::InlineSrc(inline_src) => write!(
w,
"<code>{}</code>",
self.highlight(Some(&inline_src.lang), &inline_src.body)
)?,
Element::SourceBlock(block) => {
if block.language.is_empty() {
write!(w, "<pre class=\"example\">{}</pre>", block.contents)?;
} else {
write!(
w,
"<div class=\"org-src-container\"><pre class=\"src src-{}\">{}</pre></div>",
block.language,
self.highlight(Some(&block.language), &block.contents)
)?;
}
}
self.output += r#"</span></span>"#;
Element::FixedWidth(fixed_width) => write!(
w,
"<pre class=\"example\">{}</pre>",
self.highlight(None, &fixed_width.value)
)?,
Element::ExampleBlock(block) => write!(
w,
"<pre class=\"example\">{}</pre>",
self.highlight(None, &block.contents)
)?,
_ => self.inner.start(w, element)?,
}
Ok(())
}
Event::LatexFragment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::LatexEnvironment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
// ignores keyword
Event::Enter(Container::Keyword(_)) => ctx.skip(),
Event::Entity(entity) => self.output += entity.html(),
_ => {}
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E> {
self.inner.end(w, element)
}
}
}
#[cfg(feature = "syntect")]
pub use syntect_handler::SyntectHtmlHandler;

View file

@ -1,186 +0,0 @@
use std::cmp::min;
use std::fmt::Write as _;
use crate::{SyntaxElement, SyntaxNode};
use super::event::{Container, Event};
use super::TraversalContext;
use super::Traverser;
#[derive(Default)]
pub struct MarkdownExport {
output: String,
inside_blockquote: bool,
}
impl MarkdownExport {
pub fn push_str(&mut self, s: impl AsRef<str>) {
self.output += s.as_ref();
}
/// Render syntax node to markdown string
///
/// ```rust
/// use orgize::{Org, ast::Bold, export::MarkdownExport, rowan::ast::AstNode};
///
/// let org = Org::parse("* /hello/ *world*");
/// let bold = org.first_node::<Bold>().unwrap();
/// let mut markdown = MarkdownExport::default();
/// markdown.render(bold.syntax());
/// assert_eq!(markdown.finish(), "**world**");
/// ```
pub fn render(&mut self, node: &SyntaxNode) {
let mut ctx = TraversalContext::default();
self.element(SyntaxElement::Node(node.clone()), &mut ctx);
}
pub fn finish(self) -> String {
self.output
}
fn follows_newline(&mut self) {
if !self.output.is_empty() && !self.output.ends_with(['\n', '\r']) {
self.output += "\n";
}
}
}
impl Traverser for MarkdownExport {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
match event {
Event::Enter(Container::Document(_)) => {}
Event::Leave(Container::Document(_)) => {}
Event::Enter(Container::Headline(headline)) => {
self.follows_newline();
let level = min(headline.level(), 6);
let _ = write!(&mut self.output, "{} ", "#".repeat(level));
for elem in headline.title() {
self.element(elem, ctx);
}
}
Event::Leave(Container::Headline(_)) => {}
Event::Enter(Container::Paragraph(_)) => {}
Event::Leave(Container::Paragraph(_)) => self.output += "\n",
Event::Enter(Container::Section(_)) => self.follows_newline(),
Event::Leave(Container::Section(_)) => {}
Event::Enter(Container::Italic(_)) => self.output += "*",
Event::Leave(Container::Italic(_)) => self.output += "*",
Event::Enter(Container::Bold(_)) => self.output += "**",
Event::Leave(Container::Bold(_)) => self.output += "**",
Event::Enter(Container::Strike(_)) => self.output += "~~",
Event::Leave(Container::Strike(_)) => self.output += "~~",
Event::Enter(Container::Underline(_)) => {}
Event::Leave(Container::Underline(_)) => {}
Event::Enter(Container::Verbatim(_))
| Event::Leave(Container::Verbatim(_))
| Event::Enter(Container::Code(_))
| Event::Leave(Container::Code(_)) => self.output += "`",
Event::Enter(Container::SourceBlock(block)) => {
self.follows_newline();
self.output += "```";
if let Some(language) = block.language() {
self.output += &language;
}
}
Event::Leave(Container::SourceBlock(_)) => self.output += "```\n",
Event::Enter(Container::QuoteBlock(_)) => {
self.inside_blockquote = true;
self.follows_newline();
self.output += "> ";
}
Event::Leave(Container::QuoteBlock(_)) => self.inside_blockquote = false,
Event::Enter(Container::CommentBlock(_)) => self.output += "<!--",
Event::Leave(Container::CommentBlock(_)) => self.output += "-->",
Event::Enter(Container::Comment(_)) => self.output += "<!--",
Event::Leave(Container::Comment(_)) => self.output += "-->",
Event::Enter(Container::Subscript(_)) => self.output += "<sub>",
Event::Leave(Container::Subscript(_)) => self.output += "</sub>",
Event::Enter(Container::Superscript(_)) => self.output += "<sup>",
Event::Leave(Container::Superscript(_)) => self.output += "</sup>",
Event::Enter(Container::List(_list)) => {}
Event::Leave(Container::List(_list)) => {}
Event::Enter(Container::ListItem(list_item)) => {
self.follows_newline();
self.output += &" ".repeat(list_item.indent());
self.output += &list_item.bullet();
}
Event::Leave(Container::ListItem(_)) => {}
Event::Enter(Container::OrgTable(_table)) => {}
Event::Leave(Container::OrgTable(_)) => {}
Event::Enter(Container::OrgTableRow(_row)) => {}
Event::Leave(Container::OrgTableRow(_row)) => {}
Event::Enter(Container::OrgTableCell(_)) => {}
Event::Leave(Container::OrgTableCell(_)) => {}
Event::Enter(Container::Link(link)) => {
let path = link.path();
let path = path.trim_start_matches("file:");
if link.is_image() {
let _ = write!(&mut self.output, "![]({path})");
return ctx.skip();
}
if !link.has_description() {
let _ = write!(&mut self.output, r#"[{}]({})"#, &path, &path);
return ctx.skip();
}
self.output += "[";
}
Event::Leave(Container::Link(link)) => {
let _ = write!(&mut self.output, r#"]({})"#, &*link.path());
}
Event::Text(text) => {
if self.inside_blockquote {
for (idx, line) in text.split('\n').enumerate() {
if idx != 0 {
self.output += "\n> ";
}
self.output += line;
}
} else {
self.output += &*text;
}
}
Event::LineBreak(_) => {}
Event::Snippet(_snippet) => {}
Event::Rule(_) => self.output += "\n-----\n",
Event::Timestamp(_timestamp) => {}
Event::LatexFragment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::LatexEnvironment(latex) => {
let _ = write!(&mut self.output, "{}", &latex.syntax);
}
Event::Entity(entity) => self.output += entity.utf8(),
_ => {}
}
}
}

View file

@ -1,11 +1,31 @@
//! Export `Org` struct to various formats.
mod event;
mod html;
mod markdown;
mod traverse;
mod org;
pub use event::{Container, Event};
pub use html::{HtmlEscape, HtmlExport};
pub use markdown::MarkdownExport;
pub use traverse::{from_fn, from_fn_with_ctx, FromFn, FromFnWithCtx, TraversalContext, Traverser};
#[cfg(feature = "syntect")]
pub use html::SyntectHtmlHandler;
pub use html::{DefaultHtmlHandler, HtmlEscape, HtmlHandler};
pub use org::{DefaultOrgHandler, OrgHandler};
use std::io::{Error, Write};
use crate::elements::Datetime;
pub(crate) fn write_datetime<W: Write>(
mut w: W,
start: &str,
datetime: &Datetime,
end: &str,
) -> Result<(), Error> {
write!(w, "{}", start)?;
write!(
w,
"{}-{:02}-{:02} {}",
datetime.year, datetime.month, datetime.day, datetime.dayname
)?;
if let (Some(hour), Some(minute)) = (datetime.hour, datetime.minute) {
write!(w, " {:02}:{:02}", hour, minute)?;
}
write!(w, "{}", end)
}

321
src/export/org.rs Normal file
View file

@ -0,0 +1,321 @@
use std::io::{Error, Result as IOResult, Write};
use crate::elements::{Clock, Element, Table, Timestamp};
use crate::export::write_datetime;
pub trait OrgHandler<E: From<Error>>: Default {
fn start<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
fn end<W: Write>(&mut self, w: W, element: &Element) -> Result<(), E>;
}
#[derive(Default)]
pub struct DefaultOrgHandler;
impl OrgHandler<Error> for DefaultOrgHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(block) => {
writeln!(w, "#+BEGIN_{}", block.name)?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::QuoteBlock(block) => {
writeln!(&mut w, "#+BEGIN_QUOTE")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::CenterBlock(block) => {
writeln!(&mut w, "#+BEGIN_CENTER")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::VerseBlock(block) => {
writeln!(&mut w, "#+BEGIN_VERSE")?;
write_blank_lines(&mut w, block.pre_blank)?;
}
Element::Bold => write!(w, "*")?,
Element::Document { pre_blank } => {
write_blank_lines(w, *pre_blank)?;
}
Element::DynBlock(dyn_block) => {
write!(&mut w, "#+BEGIN: {}", dyn_block.block_name)?;
if let Some(parameters) = &dyn_block.arguments {
write!(&mut w, " {}", parameters)?;
}
write_blank_lines(&mut w, dyn_block.pre_blank + 1)?;
}
Element::Headline { .. } => (),
Element::List(_list) => (),
Element::Italic => write!(w, "/")?,
Element::ListItem(list_item) => {
for _ in 0..list_item.indent {
write!(&mut w, " ")?;
}
write!(&mut w, "{}", list_item.bullet)?;
}
Element::Paragraph { .. } => (),
Element::Section => (),
Element::Strike => write!(w, "+")?,
Element::Underline => write!(w, "_")?,
Element::Drawer(drawer) => {
writeln!(&mut w, ":{}:", drawer.name)?;
write_blank_lines(&mut w, drawer.pre_blank)?;
}
// non-container elements
Element::CommentBlock(block) => {
writeln!(&mut w, "#+BEGIN_COMMENT")?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_COMMENT")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::ExampleBlock(block) => {
writeln!(&mut w, "#+BEGIN_EXAMPLE")?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_EXAMPLE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::ExportBlock(block) => {
writeln!(&mut w, "#+BEGIN_EXPORT {}", block.data)?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_EXPORT")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::SourceBlock(block) => {
writeln!(&mut w, "#+BEGIN_SRC {}", block.language)?;
write!(&mut w, "{}", block.contents)?;
writeln!(&mut w, "#+END_SRC")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::BabelCall(call) => {
writeln!(&mut w, "#+CALL: {}", call.value)?;
write_blank_lines(w, call.post_blank)?;
}
Element::InlineSrc(inline_src) => {
write!(&mut w, "src_{}", inline_src.lang)?;
if let Some(options) = &inline_src.options {
write!(&mut w, "[{}]", options)?;
}
write!(&mut w, "{{{}}}", inline_src.body)?;
}
Element::Code { value } => write!(w, "~{}~", value)?,
Element::FnRef(fn_ref) => {
write!(&mut w, "[fn:{}", fn_ref.label)?;
if let Some(definition) = &fn_ref.definition {
write!(&mut w, ":{}", definition)?;
}
write!(&mut w, "]")?;
}
Element::InlineCall(inline_call) => {
write!(&mut w, "call_{}", inline_call.name)?;
if let Some(header) = &inline_call.inside_header {
write!(&mut w, "[{}]", header)?;
}
write!(&mut w, "({})", inline_call.arguments)?;
if let Some(header) = &inline_call.end_header {
write!(&mut w, "[{}]", header)?;
}
}
Element::Link(link) => {
write!(&mut w, "[[{}]", link.path)?;
if let Some(desc) = &link.desc {
write!(&mut w, "[{}]", desc)?;
}
write!(&mut w, "]")?;
}
Element::Macros(_macros) => (),
Element::RadioTarget => (),
Element::Snippet(snippet) => write!(w, "@@{}:{}@@", snippet.name, snippet.value)?,
Element::Target(_target) => (),
Element::Text { value } => write!(w, "{}", value)?,
Element::Timestamp(timestamp) => {
write_timestamp(&mut w, &timestamp)?;
}
Element::Verbatim { value } => write!(w, "={}=", value)?,
Element::FnDef(fn_def) => {
write_blank_lines(w, fn_def.post_blank)?;
}
Element::Clock(clock) => {
write!(w, "CLOCK: ")?;
match clock {
Clock::Closed {
start,
end,
duration,
post_blank,
..
} => {
write_datetime(&mut w, "[", &start, "]--")?;
write_datetime(&mut w, "[", &end, "]")?;
writeln!(&mut w, " => {}", duration)?;
write_blank_lines(&mut w, *post_blank)?;
}
Clock::Running {
start, post_blank, ..
} => {
write_datetime(&mut w, "[", &start, "]\n")?;
write_blank_lines(&mut w, *post_blank)?;
}
}
}
Element::Comment(comment) => {
write!(w, "{}", comment.value)?;
write_blank_lines(&mut w, comment.post_blank)?;
}
Element::FixedWidth(fixed_width) => {
write!(&mut w, "{}", fixed_width.value)?;
write_blank_lines(&mut w, fixed_width.post_blank)?;
}
Element::Keyword(keyword) => {
write!(&mut w, "#+{}", keyword.key)?;
if let Some(optional) = &keyword.optional {
write!(&mut w, "[{}]", optional)?;
}
writeln!(&mut w, ": {}", keyword.value)?;
write_blank_lines(&mut w, keyword.post_blank)?;
}
Element::Rule(rule) => {
writeln!(w, "-----")?;
write_blank_lines(&mut w, rule.post_blank)?;
}
Element::Cookie(_cookie) => (),
Element::Title(title) => {
for _ in 0..title.level {
write!(&mut w, "*")?;
}
if let Some(keyword) = &title.keyword {
write!(&mut w, " {}", keyword)?;
}
if let Some(priority) = title.priority {
write!(&mut w, " [#{}]", priority)?;
}
write!(&mut w, " ")?;
}
Element::Table(_) => (),
Element::TableRow(_) => (),
Element::TableCell(_) => (),
}
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> IOResult<()> {
match element {
// container elements
Element::SpecialBlock(block) => {
writeln!(&mut w, "#+END_{}", block.name)?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::QuoteBlock(block) => {
writeln!(&mut w, "#+END_QUOTE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::CenterBlock(block) => {
writeln!(&mut w, "#+END_CENTER")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::VerseBlock(block) => {
writeln!(&mut w, "#+END_VERSE")?;
write_blank_lines(&mut w, block.post_blank)?;
}
Element::Bold => write!(w, "*")?,
Element::Document { .. } => (),
Element::DynBlock(dyn_block) => {
writeln!(w, "#+END:")?;
write_blank_lines(w, dyn_block.post_blank)?;
}
Element::Headline { .. } => (),
Element::List(list) => {
write_blank_lines(w, list.post_blank)?;
}
Element::Italic => write!(w, "/")?,
Element::ListItem(_) => (),
Element::Paragraph { post_blank } => {
write_blank_lines(w, post_blank + 1)?;
}
Element::Section => (),
Element::Strike => write!(w, "+")?,
Element::Underline => write!(w, "_")?,
Element::Drawer(drawer) => {
writeln!(&mut w, ":END:")?;
write_blank_lines(&mut w, drawer.post_blank)?;
}
Element::Title(title) => {
if !title.tags.is_empty() {
write!(&mut w, " :")?;
for tag in &title.tags {
write!(&mut w, "{}:", tag)?;
}
}
writeln!(&mut w)?;
if let Some(planning) = &title.planning {
if let Some(scheduled) = &planning.scheduled {
write!(&mut w, "SCHEDULED: ")?;
write_timestamp(&mut w, &scheduled)?;
}
if let Some(deadline) = &planning.deadline {
if planning.scheduled.is_some() {
write!(&mut w, " ")?;
}
write!(&mut w, "DEADLINE: ")?;
write_timestamp(&mut w, &deadline)?;
}
if let Some(closed) = &planning.closed {
if planning.deadline.is_some() {
write!(&mut w, " ")?;
}
write!(&mut w, "CLOSED: ")?;
write_timestamp(&mut w, &closed)?;
}
writeln!(&mut w)?;
}
if !title.properties.is_empty() {
writeln!(&mut w, ":PROPERTIES:")?;
for (key, value) in &title.properties {
writeln!(&mut w, ":{}: {}", key, value)?;
}
writeln!(&mut w, ":END:")?;
}
write_blank_lines(&mut w, title.post_blank)?;
}
Element::Table(Table::Org { post_blank, .. }) => {
write_blank_lines(w, *post_blank)?;
}
Element::Table(Table::TableEl { post_blank, .. }) => {
write_blank_lines(w, *post_blank)?;
}
Element::TableRow(_) => (),
Element::TableCell(_) => (),
// non-container elements
_ => debug_assert!(!element.is_container()),
}
Ok(())
}
}
fn write_blank_lines<W: Write>(mut w: W, count: usize) -> Result<(), Error> {
for _ in 0..count {
writeln!(w)?;
}
Ok(())
}
fn write_timestamp<W: Write>(mut w: W, timestamp: &Timestamp) -> Result<(), Error> {
match timestamp {
Timestamp::Active { start, .. } => {
write_datetime(w, "<", start, ">")?;
}
Timestamp::Inactive { start, .. } => {
write_datetime(w, "[", start, "]")?;
}
Timestamp::ActiveRange { start, end, .. } => {
write_datetime(&mut w, "<", start, ">--")?;
write_datetime(&mut w, "<", end, ">")?;
}
Timestamp::InactiveRange { start, end, .. } => {
write_datetime(&mut w, "[", start, "]--")?;
write_datetime(&mut w, "[", end, "]")?;
}
Timestamp::Diary { value } => write!(w, "<%%({})>", value)?,
}
Ok(())
}

View file

@ -1,282 +0,0 @@
use crate::ast::*;
use crate::syntax::{SyntaxElement, SyntaxKind};
use rowan::ast::AstNode;
use SyntaxKind::*;
use super::event::{Container, Event};
#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)]
enum TraversalControl {
Up,
Stop,
Skip,
#[default]
Continue,
}
#[derive(Default)]
pub struct TraversalContext {
control: TraversalControl,
}
impl TraversalContext {
/// Stops traversal completely
pub fn stop(&mut self) {
self.control = TraversalControl::Stop;
}
/// Skips traversal of the current node's siblings
pub fn up(&mut self) {
self.control = TraversalControl::Up;
}
/// Skips traversal of the current node's descendants
pub fn skip(&mut self) {
self.control = TraversalControl::Skip;
}
/// Continues traversal
pub fn r#continue(&mut self) {
self.control = TraversalControl::Continue;
}
}
/// A trait for enumerating org syntax tree
///
/// ### `TraversalContext`
///
/// `TraversalContext` can be used to control the traversal.
///
/// For example, `ctx.skip()` will skips the traversal for current
/// element and its descendants and improve the traversal performance.
///
/// ```rust
/// use orgize::{
/// export::{Container, Event, HtmlExport, TraversalContext, Traverser},
/// Org,
/// };
/// use slugify::slugify;
///
/// #[derive(Default)]
/// struct Toc(HtmlExport);
///
/// impl Traverser for Toc {
/// fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
/// match event {
/// Event::Enter(Container::Headline(headline)) => {
/// let title = headline.title().map(|e| e.to_string()).collect::<String>();
/// self.0.push_str(&format!("<a href='#{}'>", slugify!(&title)));
/// for elem in headline.title() {
/// self.element(elem, ctx);
/// }
/// self.0.push_str("</a>");
/// if headline.headlines().count() > 0 {
/// self.0.push_str("<ul>");
/// }
/// }
/// Event::Leave(Container::Headline(headline)) => {
/// if headline.headlines().count() > 0 {
/// self.0.push_str("</ul>");
/// }
/// }
/// Event::Enter(Container::Section(_)) | Event::Leave(Container::Section(_)) => ctx.skip(),
/// Event::Enter(Container::Document(_)) | Event::Leave(Container::Document(_)) => {}
/// _ => self.0.event(event, ctx),
/// }
/// }
/// }
///
/// let org = Org::parse(r#"
/// * heading 1
/// section 1
/// ** heading 1.1
/// ** heading 1.2
/// * heading 2
/// section 2
/// * heading 3
/// **** heading 3.1"#);
/// let mut toc = Toc::default();
/// org.traverse(&mut toc);
/// assert_eq!(toc.0.finish(), "\
/// <a href='#heading-1'>heading 1</a>\
/// <ul><a href='#heading-1-1'>heading 1.1</a><a href='#heading-1-2'>heading 1.2</a></ul>\
/// <a href='#heading-2'>heading 2</a>\
/// <a href='#heading-3'>heading 3</a>\
/// <ul><a href='#heading-3-1'>heading 3.1</a></ul>");
/// ```
pub trait Traverser {
/// Handles traversal event
fn event(&mut self, event: Event, ctx: &mut TraversalContext);
fn element(&mut self, element: SyntaxElement, ctx: &mut TraversalContext) {
macro_rules! take_control {
() => {
match ctx.control {
TraversalControl::Stop => {
ctx.control = TraversalControl::Stop;
return;
}
TraversalControl::Up => {
ctx.control = TraversalControl::Skip;
return;
}
TraversalControl::Skip => {
ctx.control = TraversalControl::Continue;
return;
}
TraversalControl::Continue => {}
}
};
}
match element {
SyntaxElement::Node(node) => {
macro_rules! walk {
($ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::Enter(Container::$ast(node.clone())), ctx);
take_control!();
for child in node.syntax.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
self.event(Event::Leave(Container::$ast(node.clone())), ctx);
take_control!();
}};
(@$ast:ident) => {{
debug_assert!($ast::can_cast(node.kind()));
let node = $ast { syntax: node };
self.event(Event::$ast(node), ctx);
take_control!();
}};
}
match node.kind() {
DOCUMENT => walk!(Document),
HEADLINE => walk!(Headline),
SECTION => walk!(Section),
PARAGRAPH => walk!(Paragraph),
BOLD => walk!(Bold),
ITALIC => walk!(Italic),
STRIKE => walk!(Strike),
UNDERLINE => walk!(Underline),
LIST => walk!(List),
LIST_ITEM => walk!(ListItem),
CODE => walk!(Code),
INLINE_CALL => walk!(@InlineCall),
INLINE_SRC => walk!(@InlineSrc),
RULE => walk!(@Rule),
VERBATIM => walk!(Verbatim),
SPECIAL_BLOCK => walk!(SpecialBlock),
QUOTE_BLOCK => walk!(QuoteBlock),
CENTER_BLOCK => walk!(CenterBlock),
VERSE_BLOCK => walk!(VerseBlock),
COMMENT_BLOCK => walk!(CommentBlock),
EXAMPLE_BLOCK => walk!(ExampleBlock),
EXPORT_BLOCK => walk!(ExportBlock),
SOURCE_BLOCK => walk!(SourceBlock),
BABEL_CALL => walk!(BabelCall),
CLOCK => walk!(@Clock),
COOKIE => walk!(@Cookie),
RADIO_TARGET => walk!(RadioTarget),
DRAWER => walk!(Drawer),
DYN_BLOCK => walk!(DynBlock),
FN_DEF => walk!(FnDef),
FN_REF => walk!(FnRef),
MACROS => walk!(@Macros),
SNIPPET => walk!(@Snippet),
TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp),
TARGET => walk!(Target),
COMMENT => walk!(Comment),
FIXED_WIDTH => walk!(FixedWidth),
ORG_TABLE => walk!(OrgTable),
ORG_TABLE_RULE_ROW | ORG_TABLE_STANDARD_ROW => walk!(OrgTableRow),
ORG_TABLE_CELL => walk!(OrgTableCell),
LINK => walk!(Link),
LATEX_FRAGMENT => walk!(@LatexFragment),
LATEX_ENVIRONMENT => walk!(@LatexEnvironment),
ENTITY => walk!(@Entity),
LINE_BREAK => walk!(@LineBreak),
SUPERSCRIPT => walk!(Superscript),
SUBSCRIPT => walk!(Subscript),
KEYWORD => walk!(Keyword),
PROPERTY_DRAWER => walk!(PropertyDrawer),
#[cfg(feature = "syntax-org-fc")]
CLOZE => walk!(@Cloze),
BLOCK_CONTENT | LIST_ITEM_CONTENT => {
for child in node.children_with_tokens() {
self.element(child, ctx);
take_control!();
}
}
_ => {}
}
}
SyntaxElement::Token(token) => {
if token.kind() == TEXT {
self.event(Event::Text(Token(token)), ctx);
take_control!();
}
}
};
}
}
pub struct FromFn<F: FnMut(Event)>(F);
impl<F: FnMut(Event)> Traverser for FromFn<F> {
fn event(&mut self, event: Event, _: &mut TraversalContext) {
(self.0)(event)
}
}
pub struct FromFnWithCtx<F: FnMut(Event, &mut TraversalContext)>(F);
impl<F: FnMut(Event, &mut TraversalContext)> Traverser for FromFnWithCtx<F> {
fn event(&mut self, event: Event, ctx: &mut TraversalContext) {
(self.0)(event, ctx)
}
}
/// A helper for creating traverser
///
/// ```rust
/// use orgize::{
/// export::{from_fn, Container, Event, Traverser},
/// Org,
/// };
///
/// let mut count = 0;
/// let mut handler = from_fn(|event| {
/// if matches!(event, Event::Enter(Container::Headline(_))) {
/// count += 1;
/// }
/// });
/// Org::parse("* 1\n** 2\n*** 3\n****4").traverse(&mut handler);
/// assert_eq!(count, 3);
/// ```
pub fn from_fn<F: FnMut(Event)>(f: F) -> FromFn<F> {
FromFn(f)
}
/// A helper for creating traverser
///
/// ```rust
/// use orgize::{
/// export::{from_fn_with_ctx, Container, Event, Traverser},
/// Org,
/// };
///
/// let mut count = 0;
/// let mut handler = from_fn_with_ctx(|event, ctx| {
/// if let Event::Enter(Container::Headline(hdl)) = event {
/// count += 1;
/// if &hdl.title_raw() == "cow" {
/// ctx.stop();
/// }
/// }
/// });
/// Org::parse("* 1\n* cow\n* 3").traverse(&mut handler);
/// assert_eq!(count, 2);
/// ```
pub fn from_fn_with_ctx<F: FnMut(Event, &mut TraversalContext)>(f: F) -> FromFnWithCtx<F> {
FromFnWithCtx(f)
}

1219
src/headline.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,23 +1,242 @@
#![doc = include_str!("../README.md")]
//! A Rust library for parsing orgmode files.
//!
//! [Live demo](https://orgize.herokuapp.com/)
//!
//! # Parse
//!
//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//!
//! [`Org::parse`]: struct.Org.html#method.parse
//!
//! ```rust
//! use orgize::Org;
//!
//! Org::parse("* DONE Title :tag:");
//! ```
//!
//! or [`Org::parse_custom`]:
//!
//! [`Org::parse_custom`]: struct.Org.html#method.parse_custom
//!
//! ```rust
//! use orgize::{Org, ParseConfig};
//!
//! Org::parse_custom(
//! "* TASK Title 1",
//! &ParseConfig {
//! // custom todo keywords
//! todo_keywords: (vec!["TASK".to_string()], vec![]),
//! ..Default::default()
//! },
//! );
//! ```
//!
//! # Iter
//!
//! [`Org::iter`] function will returns an iterator of [`Event`]s, which is
//! a simple wrapper of [`Element`].
//!
//! [`Org::iter`]: struct.Org.html#method.iter
//! [`Event`]: enum.Event.html
//! [`Element`]: elements/enum.Element.html
//!
//! ```rust
//! use orgize::Org;
//!
//! for event in Org::parse("* DONE Title :tag:").iter() {
//! // handling the event
//! }
//! ```
//!
//! **Note**: whether an element is container or not, it will appears twice in one loop.
//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//!
//! [`Event::Start(element)`]: enum.Event.html#variant.Start
//! [`Event::End(element)`]: enum.Event.html#variant.End
//!
//! # Render html
//!
//! You can call the [`Org::write_html`] function to generate html directly, which
//! uses the [`DefaultHtmlHandler`] internally:
//!
//! [`Org::write_html`]: struct.Org.html#method.write_html
//! [`DefaultHtmlHandler`]: export/struct.DefaultHtmlHandler.html
//!
//! ```rust
//! use orgize::Org;
//!
//! let mut writer = Vec::new();
//! Org::parse("* title\n*section*").write_html(&mut writer).unwrap();
//!
//! assert_eq!(
//! String::from_utf8(writer).unwrap(),
//! "<main><h1>title</h1><section><p><b>section</b></p></section></main>"
//! );
//! ```
//!
//! # Render html with custom `HtmlHandler`
//!
//! To customize html rendering, simply implementing [`HtmlHandler`] trait and passing
//! it to the [`Org::write_html_custom`] function.
//!
//! [`HtmlHandler`]: export/trait.HtmlHandler.html
//! [`Org::write_html_custom`]: struct.Org.html#method.write_html_custom
//!
//! The following code demonstrates how to add a id for every headline and return
//! own error type while rendering.
//!
//! ```rust
//! use std::convert::From;
//! use std::io::{Error as IOError, Write};
//! use std::string::FromUtf8Error;
//!
//! use orgize::export::{DefaultHtmlHandler, HtmlHandler};
//! use orgize::{Element, Org};
//! use slugify::slugify;
//!
//! #[derive(Debug)]
//! enum MyError {
//! IO(IOError),
//! Heading,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for MyError {
//! fn from(err: IOError) -> Self {
//! MyError::IO(err)
//! }
//! }
//!
//! impl From<FromUtf8Error> for MyError {
//! fn from(err: FromUtf8Error) -> Self {
//! MyError::Utf8(err)
//! }
//! }
//!
//! #[derive(Default)]
//! struct MyHtmlHandler(DefaultHtmlHandler);
//!
//! impl HtmlHandler<MyError> for MyHtmlHandler {
//! fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
//! if let Element::Title(title) = element {
//! if title.level > 6 {
//! return Err(MyError::Heading);
//! } else {
//! write!(
//! w,
//! "<h{0}><a id=\"{1}\" href=\"#{1}\">",
//! title.level,
//! slugify!(&title.raw),
//! )?;
//! }
//! } else {
//! // fallthrough to default handler
//! self.0.start(w, element)?;
//! }
//! Ok(())
//! }
//!
//! fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), MyError> {
//! if let Element::Title(title) = element {
//! write!(w, "</a></h{}>", title.level)?;
//! } else {
//! self.0.end(w, element)?;
//! }
//! Ok(())
//! }
//! }
//!
//! fn main() -> Result<(), MyError> {
//! let mut writer = Vec::new();
//! let mut handler = MyHtmlHandler::default();
//! Org::parse("* title\n*section*").write_html_custom(&mut writer, &mut handler)?;
//!
//! assert_eq!(
//! String::from_utf8(writer)?,
//! "<main><h1><a id=\"title\" href=\"#title\">title</a></h1>\
//! <section><p><b>section</b></p></section></main>"
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! **Note**: as I mentioned above, each element will appears two times while iterating.
//! And handler will silently ignores all end events from non-container elements.
//!
//! So if you want to change how a non-container element renders, just redefine the `start`
//! function and leave the `end` function unchanged.
//!
//! # Serde
//!
//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
//! serialize it into any format supported by serde, such as json:
//!
//! ```rust
//! use orgize::Org;
//! use serde_json::{json, to_string};
//!
//! let org = Org::parse("I 'm *bold*.");
//! #[cfg(feature = "ser")]
//! println!("{}", to_string(&org).unwrap());
//!
//! // {
//! // "type": "document",
//! // "children": [{
//! // "type": "section",
//! // "children": [{
//! // "type": "paragraph",
//! // "children":[{
//! // "type": "text",
//! // "value":"I 'm "
//! // }, {
//! // "type": "bold",
//! // "children":[{
//! // "type": "text",
//! // "value": "bold"
//! // }]
//! // }, {
//! // "type":"text",
//! // "value":"."
//! // }]
//! // }]
//! // }]
//! // }
//! ```
//!
//! # Features
//!
//! By now, orgize provides three features:
//!
//! + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
//!
//! + `chrono`: adds the ability to convert `Datetime` into `chrono` structs, disabled by default.
//!
//! + `syntect`: provides [`SyntectHtmlHandler`] for highlighting code block, disabled by default.
//!
//! [`SyntectHtmlHandler`]: export/struct.SyntectHtmlHandler.html
//!
//! # License
//!
//! MIT
pub mod ast;
pub mod config;
mod entities;
mod config;
pub mod elements;
pub mod export;
mod headline;
mod org;
mod replace;
mod syntax;
#[cfg(test)]
mod tests;
mod parse;
mod parsers;
mod validate;
// Re-export of the rowan crate.
pub use rowan;
// Re-export of the indextree crate.
pub use indextree;
#[cfg(feature = "syntect")]
pub use syntect;
pub use config::ParseConfig;
pub use org::Org;
pub use rowan::{TextRange, TextSize};
pub use syntax::{
SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
};
pub(crate) use syntax::combinator::lossless_parser;
pub use elements::Element;
pub use headline::{Document, Headline};
pub use org::{Event, Org};
pub use validate::ValidationError;

View file

@ -1,106 +1,193 @@
use rowan::ast::AstNode;
use rowan::{GreenNode, TextSize};
use indextree::{Arena, NodeEdge, NodeId};
use std::io::{Error, Write};
use std::ops::{Index, IndexMut};
use crate::ast::Document;
use crate::config::ParseConfig;
use crate::export::{HtmlExport, TraversalContext, Traverser};
use crate::syntax::{OrgLanguage, SyntaxNode};
use crate::SyntaxElement;
use crate::{
config::{ParseConfig, DEFAULT_CONFIG},
elements::{Element, Keyword},
export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler},
parsers::{blank_lines_count, parse_container, Container, OwnedArena},
};
pub struct Org<'a> {
pub(crate) arena: Arena<Element<'a>>,
pub(crate) root: NodeId,
}
#[derive(Debug)]
pub struct Org {
pub(crate) green: GreenNode,
pub(crate) config: ParseConfig,
pub enum Event<'a, 'b> {
Start(&'b Element<'a>),
End(&'b Element<'a>),
}
impl Org {
/// Parse input string to Org element tree using default parse config
pub fn parse(input: impl AsRef<str>) -> Org {
ParseConfig::default().parse(input)
impl<'a> Org<'a> {
/// Creates a new empty `Org` struct.
pub fn new() -> Org<'static> {
let mut arena = Arena::new();
let root = arena.new_node(Element::Document { pre_blank: 0 });
Org { arena, root }
}
pub fn green(&self) -> &GreenNode {
&self.green
/// Parses string `text` into `Org` struct.
pub fn parse(text: &'a str) -> Org<'a> {
Org::parse_custom(text, &DEFAULT_CONFIG)
}
pub fn config(&self) -> &ParseConfig {
&self.config
/// Likes `parse`, but accepts `String`.
pub fn parse_string(text: String) -> Org<'static> {
Org::parse_string_custom(text, &DEFAULT_CONFIG)
}
/// Returns the document
pub fn document(&self) -> Document {
Document {
syntax: SyntaxNode::new_root(self.green.clone()),
}
}
/// Parses string `text` into `Org` struct with custom `ParseConfig`.
pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines_count(text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };
/// Returns org-mode string
pub fn to_org(&self) -> String {
self.green.to_string()
}
/// Convert org element tree to html-format using default html handler
pub fn to_html(&self) -> String {
let mut handler = HtmlExport::default();
self.traverse(&mut handler);
handler.finish()
}
/// Walk through org element tree using given traverser
pub fn traverse<T: Traverser>(&self, t: &mut T) {
let mut ctx = TraversalContext::default();
t.element(
SyntaxElement::Node(SyntaxNode::new_root(self.green.clone())),
&mut ctx,
parse_container(
&mut org.arena,
Container::Document {
content: text,
node: org.root,
},
config,
);
org.debug_validate();
org
}
/// Returns the first node in org element tree in depth first order
pub fn first_node<N: AstNode<Language = OrgLanguage>>(&self) -> Option<N> {
fn find<N: AstNode<Language = OrgLanguage>>(node: SyntaxNode) -> Option<N> {
if N::can_cast(node.kind()) {
N::cast(node)
} else {
node.children().find_map(find)
}
}
find(SyntaxNode::new_root(self.green.clone()))
/// Likes `parse_custom`, but accepts `String`.
pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines_count(&text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };
parse_container(
&mut OwnedArena::new(&mut org.arena),
Container::Document {
content: text,
node: org.root,
},
config,
);
org.debug_validate();
org
}
/// Returns node in given offset
///
/// ```rust
/// use orgize::{Org, ast::Headline};
///
/// let org = Org::parse("\n\n* foo\n* bar");
///
/// assert!(org.node_at_offset::<Headline>(0).is_none());
///
/// let hdl = org.node_at_offset::<Headline>(2).unwrap();
/// assert_eq!(hdl.title_raw(), "foo");
///
/// let hdl = org.node_at_offset::<Headline>(9).unwrap();
/// assert_eq!(hdl.title_raw(), "bar");
///
/// assert!(org.node_at_offset::<Headline>(999).is_none());
/// ```
pub fn node_at_offset<N: AstNode<Language = OrgLanguage>>(
&self,
offset: impl Into<TextSize>,
) -> Option<N> {
let offset = offset.into();
fn find<N: AstNode<Language = OrgLanguage>>(
node: SyntaxNode,
offset: TextSize,
) -> Option<N> {
if !node.text_range().contains(offset) {
None
} else if N::can_cast(node.kind()) {
N::cast(node)
} else {
node.children().find_map(|node| find(node, offset))
/// Returns a reference to the underlay arena.
pub fn arena(&self) -> &Arena<Element<'a>> {
&self.arena
}
/// Returns a mutual reference to the underlay arena.
pub fn arena_mut(&mut self) -> &mut Arena<Element<'a>> {
&mut self.arena
}
/// Returns an iterator of `Event`s.
pub fn iter<'b>(&'b self) -> impl Iterator<Item = Event<'a, 'b>> + 'b {
self.root.traverse(&self.arena).map(move |edge| match edge {
NodeEdge::Start(node) => Event::Start(&self[node]),
NodeEdge::End(node) => Event::End(&self[node]),
})
}
/// Returns an iterator of `Keyword`s.
pub fn keywords(&self) -> impl Iterator<Item = &Keyword<'_>> {
self.root
.descendants(&self.arena)
.skip(1)
.filter_map(move |node| match &self[node] {
Element::Keyword(kw) => Some(kw),
_ => None,
})
}
/// Writes an `Org` struct as html format.
pub fn write_html<W>(&self, writer: W) -> Result<(), Error>
where
W: Write,
{
self.write_html_custom(writer, &mut DefaultHtmlHandler)
}
/// Writes an `Org` struct as html format with custom `HtmlHandler`.
pub fn write_html_custom<W, H, E>(&self, mut writer: W, handler: &mut H) -> Result<(), E>
where
W: Write,
E: From<Error>,
H: HtmlHandler<E>,
{
for event in self.iter() {
match event {
Event::Start(element) => handler.start(&mut writer, element)?,
Event::End(element) => handler.end(&mut writer, element)?,
}
}
find(SyntaxNode::new_root(self.green.clone()), offset)
Ok(())
}
/// Writes an `Org` struct as org format.
pub fn write_org<W>(&self, writer: W) -> Result<(), Error>
where
W: Write,
{
self.write_org_custom(writer, &mut DefaultOrgHandler)
}
/// Writes an `Org` struct as org format with custom `OrgHandler`.
pub fn write_org_custom<W, H, E>(&self, mut writer: W, handler: &mut H) -> Result<(), E>
where
W: Write,
E: From<Error>,
H: OrgHandler<E>,
{
for event in self.iter() {
match event {
Event::Start(element) => handler.start(&mut writer, element)?,
Event::End(element) => handler.end(&mut writer, element)?,
}
}
Ok(())
}
}
impl Default for Org<'static> {
fn default() -> Self {
Org::new()
}
}
impl<'a> Index<NodeId> for Org<'a> {
type Output = Element<'a>;
fn index(&self, node_id: NodeId) -> &Self::Output {
self.arena[node_id].get()
}
}
impl<'a> IndexMut<NodeId> for Org<'a> {
fn index_mut(&mut self, node_id: NodeId) -> &mut Self::Output {
self.arena[node_id].get_mut()
}
}
#[cfg(feature = "ser")]
use serde::{ser::Serializer, Serialize};
#[cfg(feature = "ser")]
impl Serialize for Org<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde_indextree::Node;
serializer.serialize_newtype_struct("Org", &Node::new(self.root, &self.arena))
}
}

136
src/parse/combinators.rs Normal file
View file

@ -0,0 +1,136 @@
//! Parsers combinators
use memchr::memchr;
use nom::{
bytes::complete::take_while1,
combinator::verify,
error::{make_error, ErrorKind},
Err, IResult,
};
// read until the first line_ending, if line_ending is not present, return the input directly
pub fn line(input: &str) -> IResult<&str, &str, ()> {
if let Some(i) = memchr(b'\n', input.as_bytes()) {
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
Ok((&input[i + 1..], &input[0..i - 1]))
} else {
Ok((&input[i + 1..], &input[0..i]))
}
} else {
Ok(("", input))
}
}
pub fn lines_till<F>(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()>
where
F: Fn(&str) -> bool,
{
move |i| {
let mut input = i;
loop {
// TODO: better error kind
if input.is_empty() {
return Err(Err::Error(make_error(input, ErrorKind::Many0)));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if predicate(line_) {
let offset = i.len() - input.len();
return Ok((input_, &i[0..offset]));
}
input = input_;
}
}
}
pub fn lines_while<F>(predicate: F) -> impl Fn(&str) -> IResult<&str, &str, ()>
where
F: Fn(&str) -> bool,
{
move |i| {
let mut input = i;
loop {
// unlike lines_till, line_while won't return error
if input.is_empty() {
return Ok(("", i));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if !predicate(line_) {
let offset = i.len() - input.len();
return Ok((input, &i[0..offset]));
}
input = input_;
}
}
}
#[test]
fn test_lines_while() {
assert_eq!(lines_while(|line| line == "foo")("foo"), Ok(("", "foo")));
assert_eq!(lines_while(|line| line == "foo")("bar"), Ok(("bar", "")));
assert_eq!(
lines_while(|line| line == "foo")("foo\n\n"),
Ok(("\n", "foo\n"))
);
assert_eq!(
lines_while(|line| line.trim().is_empty())("\n\n\n"),
Ok(("", "\n\n\n"))
);
}
pub fn eol(input: &str) -> IResult<&str, &str, ()> {
verify(line, |s: &str| {
s.as_bytes().iter().all(u8::is_ascii_whitespace)
})(input)
}
pub fn one_word(input: &str) -> IResult<&str, &str, ()> {
take_while1(|c: char| !c.is_ascii_whitespace())(input)
}
pub fn blank_lines_count(input: &str) -> IResult<&str, usize, ()> {
let mut count = 0;
let mut input = input;
loop {
if input.is_empty() {
return Ok(("", count));
}
let (input_, line_) = line(input)?;
debug_assert_ne!(input, input_);
if !line_.chars().all(char::is_whitespace) {
return Ok((input, count));
}
count += 1;
input = input_;
}
}
#[test]
fn test_blank_lines_count() {
assert_eq!(blank_lines_count("foo"), Ok(("foo", 0)));
assert_eq!(blank_lines_count(" foo"), Ok((" foo", 0)));
assert_eq!(blank_lines_count(" \t\nfoo\n"), Ok(("foo\n", 1)));
assert_eq!(blank_lines_count("\n \r\n\nfoo\n"), Ok(("foo\n", 3)));
assert_eq!(
blank_lines_count("\r\n \n \r\n foo\n"),
Ok((" foo\n", 3))
);
assert_eq!(blank_lines_count("\r\n \n \r\n \n"), Ok(("", 4)));
}

1
src/parse/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod combinators;

657
src/parsers.rs Normal file
View file

@ -0,0 +1,657 @@
use std::iter::once;
use std::marker::PhantomData;
use indextree::{Arena, NodeId};
use jetscii::{bytes, BytesConst};
use memchr::{memchr, memchr_iter};
use nom::bytes::complete::take_while1;
use crate::config::ParseConfig;
use crate::elements::{
block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target,
Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target,
Timestamp, Title,
};
use crate::parse::combinators::lines_while;
pub trait ElementArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>;
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>;
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>;
}
pub type BorrowedArena<'a> = Arena<Element<'a>>;
impl<'a> ElementArena<'a> for BorrowedArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
let node = self.new_node(element.into());
parent.append(node, self);
node
}
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
if let Some(child) = self[parent].last_child() {
let node = self.new_node(element.into());
child.insert_before(node, self);
node
} else {
self.append(element, parent)
}
}
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>,
{
*self[node].get_mut() = element.into();
}
}
pub struct OwnedArena<'a, 'b, 'c> {
arena: &'b mut Arena<Element<'c>>,
phantom: PhantomData<&'a ()>,
}
impl<'a, 'b, 'c> OwnedArena<'a, 'b, 'c> {
pub fn new(arena: &'b mut Arena<Element<'c>>) -> OwnedArena<'a, 'b, 'c> {
OwnedArena {
arena,
phantom: PhantomData,
}
}
}
impl<'a> ElementArena<'a> for OwnedArena<'a, '_, '_> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
self.arena.append(element.into().into_owned(), parent)
}
fn insert_before_last_child<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
{
self.arena
.insert_before_last_child(element.into().into_owned(), parent)
}
fn set<T>(&mut self, node: NodeId, element: T)
where
T: Into<Element<'a>>,
{
self.arena.set(node, element.into().into_owned());
}
}
#[derive(Debug)]
pub enum Container<'a> {
// Block, List Item
Block { content: &'a str, node: NodeId },
// Paragraph, Inline Markup
Inline { content: &'a str, node: NodeId },
// Headline
Headline { content: &'a str, node: NodeId },
// Document
Document { content: &'a str, node: NodeId },
}
pub fn parse_container<'a, T: ElementArena<'a>>(
arena: &mut T,
container: Container<'a>,
config: &ParseConfig,
) {
let containers = &mut vec![container];
while let Some(container) = containers.pop() {
match container {
Container::Document { content, node } => {
parse_section_and_headlines(arena, content, node, containers);
}
Container::Headline { content, node } => {
parse_headline_content(arena, content, node, containers, config);
}
Container::Block { content, node } => {
parse_blocks(arena, content, node, containers);
}
Container::Inline { content, node } => {
parse_inlines(arena, content, node, containers);
}
}
}
}
pub fn parse_headline_content<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
config: &ParseConfig,
) {
let (tail, (title, content)) = Title::parse(content, config).unwrap();
let node = arena.append(title, parent);
containers.push(Container::Inline { content, node });
parse_section_and_headlines(arena, tail, parent, containers);
}
pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let content = blank_lines_count(content).0;
if content.is_empty() {
return;
}
let mut last_end = 0;
for i in memchr_iter(b'\n', content.as_bytes()).chain(once(content.len())) {
if let Some((mut tail, (headline_content, level))) = parse_headline(&content[last_end..]) {
if last_end != 0 {
let node = arena.append(Element::Section, parent);
let content = &content[0..last_end];
containers.push(Container::Block { content, node });
}
let node = arena.append(Element::Headline { level }, parent);
containers.push(Container::Headline {
content: headline_content,
node,
});
while let Some((new_tail, (content, level))) = parse_headline(tail) {
debug_assert_ne!(tail, new_tail);
let node = arena.append(Element::Headline { level }, parent);
containers.push(Container::Headline { content, node });
tail = new_tail;
}
return;
}
last_end = i + 1;
}
let node = arena.append(Element::Section, parent);
containers.push(Container::Block { content, node });
}
pub fn parse_blocks<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = blank_lines_count(content).0;
if let Some(new_tail) = parse_block(content, arena, parent, containers) {
tail = blank_lines_count(new_tail).0;
}
let mut text = tail;
let mut pos = 0;
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
let (tail_, blank) = blank_lines_count(&tail[i..]);
debug_assert_ne!(tail, tail_);
tail = tail_;
let node = arena.append(
Element::Paragraph {
// including the current line (&tail[0..i])
post_blank: blank + 1,
},
parent,
);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
pos = 0;
text = tail;
} else if let Some(new_tail) = parse_block(tail, arena, parent, containers) {
if pos != 0 {
let node =
arena.insert_before_last_child(Element::Paragraph { post_blank: 0 }, parent);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
pos = 0;
}
debug_assert_ne!(tail, blank_lines_count(new_tail).0);
tail = blank_lines_count(new_tail).0;
text = tail;
} else {
debug_assert_ne!(tail, &tail[i..]);
tail = &tail[i..];
pos += i;
}
}
if !text.is_empty() {
let node = arena.append(Element::Paragraph { post_blank: 0 }, parent);
containers.push(Container::Inline {
content: &text[0..pos].trim_end(),
node,
});
}
}
pub fn parse_block<'a, T: ElementArena<'a>>(
contents: &'a str,
arena: &mut T,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> {
match contents
.as_bytes()
.iter()
.find(|c| !c.is_ascii_whitespace())?
{
b'[' => {
let (tail, (fn_def, content)) = FnDef::parse(contents)?;
let node = arena.append(fn_def, parent);
containers.push(Container::Block { content, node });
Some(tail)
}
b'0'..=b'9' | b'*' => {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
b'C' => {
let (tail, clock) = Clock::parse(contents)?;
arena.append(clock, parent);
Some(tail)
}
b'\'' => {
// TODO: LaTeX environment
None
}
b'-' => {
if let Some((tail, rule)) = Rule::parse(contents) {
arena.append(rule, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b':' => {
if let Some((tail, (drawer, content))) = Drawer::parse(contents) {
let node = arena.append(drawer, parent);
containers.push(Container::Block { content, node });
Some(tail)
} else {
let (tail, fixed_width) = FixedWidth::parse(contents)?;
arena.append(fixed_width, parent);
Some(tail)
}
}
b'|' => {
let tail = parse_org_table(arena, contents, containers, parent);
Some(tail)
}
b'+' => {
if let Some((tail, table)) = Table::parse_table_el(contents) {
arena.append(table, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b'#' => {
if let Some((tail, block)) = RawBlock::parse(contents) {
let (element, content) = block.into_element();
// avoid use after free
let is_block_container = match element {
Element::CenterBlock(_)
| Element::QuoteBlock(_)
| Element::VerseBlock(_)
| Element::SpecialBlock(_) => true,
_ => false,
};
let node = arena.append(element, parent);
if is_block_container {
containers.push(Container::Block { content, node });
}
Some(tail)
} else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) {
let node = arena.append(dyn_block, parent);
containers.push(Container::Block { content, node });
Some(tail)
} else if let Some((tail, keyword)) = RawKeyword::parse(contents) {
arena.append(keyword.into_element(), parent);
Some(tail)
} else {
let (tail, comment) = Comment::parse(contents)?;
arena.append(comment, parent);
Some(tail)
}
}
_ => None,
}
}
struct InlinePositions<'a> {
bytes: &'a [u8],
pos: usize,
next: Option<usize>,
}
impl InlinePositions<'_> {
fn new(bytes: &[u8]) -> InlinePositions {
InlinePositions {
bytes,
pos: 0,
next: Some(0),
}
}
}
impl Iterator for InlinePositions<'_> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
lazy_static::lazy_static! {
static ref PRE_BYTES: BytesConst =
bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
}
self.next.take().or_else(|| {
PRE_BYTES.find(&self.bytes[self.pos..]).map(|i| {
self.pos += i + 1;
match self.bytes[self.pos - 1] {
b'{' => {
self.next = Some(self.pos);
self.pos - 1
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos,
_ => self.pos - 1,
}
})
})
}
}
pub fn parse_inlines<'a, T: ElementArena<'a>>(
arena: &mut T,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = content;
if let Some(tail_) = parse_inline(tail, arena, containers, parent) {
tail = tail_;
}
while let Some((tail_, i)) = InlinePositions::new(tail.as_bytes())
.filter_map(|i| parse_inline(&tail[i..], arena, containers, parent).map(|tail| (tail, i)))
.next()
{
if i != 0 {
arena.insert_before_last_child(
Element::Text {
value: tail[0..i].into(),
},
parent,
);
}
tail = tail_;
}
if !tail.is_empty() {
arena.append(Element::Text { value: tail.into() }, parent);
}
}
pub fn parse_inline<'a, T: ElementArena<'a>>(
contents: &'a str,
arena: &mut T,
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> Option<&'a str> {
if contents.len() < 3 {
return None;
}
let byte = contents.as_bytes()[0];
match byte {
b'@' => {
let (tail, snippet) = Snippet::parse(contents)?;
arena.append(snippet, parent);
Some(tail)
}
b'{' => {
let (tail, macros) = Macros::parse(contents)?;
arena.append(macros, parent);
Some(tail)
}
b'<' => {
if let Some((tail, _content)) = parse_radio_target(contents) {
arena.append(Element::RadioTarget, parent);
Some(tail)
} else if let Some((tail, target)) = Target::parse(contents) {
arena.append(target, parent);
Some(tail)
} else if let Some((tail, timestamp)) = Timestamp::parse_active(contents) {
arena.append(timestamp, parent);
Some(tail)
} else {
let (tail, timestamp) = Timestamp::parse_diary(contents)?;
arena.append(timestamp, parent);
Some(tail)
}
}
b'[' => {
if let Some((tail, fn_ref)) = FnRef::parse(contents) {
arena.append(fn_ref, parent);
Some(tail)
} else if let Some((tail, link)) = Link::parse(contents) {
arena.append(link, parent);
Some(tail)
} else if let Some((tail, cookie)) = Cookie::parse(contents) {
arena.append(cookie, parent);
Some(tail)
} else {
let (tail, timestamp) = Timestamp::parse_inactive(contents)?;
arena.append(timestamp, parent);
Some(tail)
}
}
b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => {
let (tail, emphasis) = Emphasis::parse(contents, byte)?;
let (element, content) = emphasis.into_element();
let is_inline_container = match element {
Element::Bold | Element::Strike | Element::Italic | Element::Underline => true,
_ => false,
};
let node = arena.append(element, parent);
if is_inline_container {
containers.push(Container::Inline { content, node });
}
Some(tail)
}
b's' => {
let (tail, inline_src) = InlineSrc::parse(contents)?;
arena.append(inline_src, parent);
Some(tail)
}
b'c' => {
let (tail, inline_call) = InlineCall::parse(contents)?;
arena.append(inline_call, parent);
Some(tail)
}
_ => None,
}
}
pub fn parse_list<'a, T: ElementArena<'a>>(
arena: &mut T,
contents: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> {
let (mut tail, (first_item, content)) = ListItem::parse(contents)?;
let first_item_indent = first_item.indent;
let first_item_ordered = first_item.ordered;
let parent = arena.append(Element::Document { pre_blank: 0 }, parent); // placeholder
let node = arena.append(first_item, parent);
containers.push(Container::Block { content, node });
while let Some((tail_, (item, content))) = ListItem::parse(tail) {
if item.indent == first_item_indent {
let node = arena.append(item, parent);
containers.push(Container::Block { content, node });
debug_assert_ne!(tail, tail_);
tail = tail_;
} else {
break;
}
}
let (tail, post_blank) = blank_lines_count(tail);
arena.set(
parent,
List {
indent: first_item_indent,
ordered: first_item_ordered,
post_blank,
},
);
Some(tail)
}
pub fn parse_org_table<'a, T: ElementArena<'a>>(
arena: &mut T,
contents: &'a str,
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> &'a str {
let (tail, contents) =
lines_while(|line| line.trim_start().starts_with('|'))(contents).unwrap_or((contents, ""));
let (tail, post_blank) = blank_lines_count(tail);
let mut iter = contents.trim_end().lines().peekable();
let mut lines = vec![];
let mut has_header = false;
// TODO: merge contiguous rules
if let Some(line) = iter.next() {
let line = line.trim_start();
if !line.starts_with("|-") {
lines.push(line);
}
}
while let Some(line) = iter.next() {
let line = line.trim_start();
if iter.peek().is_none() && line.starts_with("|-") {
break;
} else if line.starts_with("|-") {
has_header = true;
}
lines.push(line);
}
let parent = arena.append(
Table::Org {
tblfm: None,
post_blank,
has_header,
},
parent,
);
for line in lines {
if line.starts_with("|-") {
if has_header {
arena.append(Element::TableRow(TableRow::HeaderRule), parent);
has_header = false;
} else {
arena.append(Element::TableRow(TableRow::BodyRule), parent);
}
} else {
if has_header {
let parent = arena.append(Element::TableRow(TableRow::Header), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Header), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
} else {
let parent = arena.append(Element::TableRow(TableRow::Body), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Body), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
}
}
}
tail
}
pub fn blank_lines_count(input: &str) -> (&str, usize) {
crate::parse::combinators::blank_lines_count(input).unwrap_or((input, 0))
}
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
let (input_, level) = parse_headline_level(input)?;
let (input_, content) = lines_while(move |line| {
parse_headline_level(line)
.map(|(_, l)| l > level)
.unwrap_or(true)
})(input_)
.unwrap_or((input_, ""));
Some((input_, (&input[0..level + content.len()], level)))
}
pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> {
let (input, stars) = take_while1::<_, _, ()>(|c: char| c == '*')(input).ok()?;
if input.starts_with(' ') || input.starts_with('\n') || input.is_empty() {
Some((input, stars.len()))
} else {
None
}
}

View file

@ -1,326 +0,0 @@
use rowan::{
ast::{support, AstNode},
SyntaxNode, TextRange, TextSize, TokenAtOffset,
};
use crate::ast::Headline;
use crate::syntax::{
combinator::line_starts_iter, document::document_node, headline::headline_node, OrgLanguage,
};
use crate::Org;
#[derive(Debug)]
enum RangeShape {
InsideHeadline { headline: Headline, level: usize },
ExactHeadline { headline: Headline, level: usize },
Other,
}
impl RangeShape {
pub fn new(mut node: SyntaxNode<OrgLanguage>, range: TextRange) -> Self {
let mut result = RangeShape::Other;
'l: loop {
for headline in support::children::<Headline>(&node) {
let level = headline.level();
let start = headline.syntax.text_range().start();
let end = headline.syntax.text_range().end();
if headline.syntax.text_range() == range {
result = RangeShape::ExactHeadline { headline, level };
break 'l;
}
if TextRange::new(start + TextSize::from(level as u32 + 1), end)
.contains_range(range)
{
node = headline.syntax.clone();
result = RangeShape::InsideHeadline { headline, level };
continue 'l;
}
}
break;
}
result
}
}
#[derive(Debug, PartialEq)]
enum ReplaceWithShape {
IncludeHeadline { level: usize },
ExactHeadline { level: usize },
Other,
}
impl ReplaceWithShape {
fn new(text: &str) -> Self {
let mut result = ReplaceWithShape::Other;
for start in line_starts_iter(text) {
let level = text[start..].bytes().take_while(|&c| c == b'*').count();
if level == 0 {
continue;
}
if !matches!(text[start..].as_bytes().get(level), Some(b' ')) {
continue;
}
match result {
ReplaceWithShape::IncludeHeadline { level: l } => {
if level < l {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
ReplaceWithShape::ExactHeadline { level: l } => {
if level <= l {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
ReplaceWithShape::Other => {
if start == 0 {
result = ReplaceWithShape::ExactHeadline { level }
} else {
result = ReplaceWithShape::IncludeHeadline { level }
}
}
}
}
result
}
}
impl Org {
/// Replace specified range with given text, and reparse the syntax tree with current config
///
/// This method optimizes parsing by analyzing the selected range and given text, and reducing
/// the amount of data processed by parser.
///
/// ```rust
/// use orgize::{Org, ast::Headline, TextRange, TextSize};
///
/// let mut org = Org::parse("** hello");
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 2);
///
/// // replace '**' with '*****'
/// org.replace_range(TextRange::new(0.into(), 2.into()), "*****");
/// // since the syntax tree is changed, we have to query again
/// let hdl = org.first_node::<Headline>().unwrap();
/// assert_eq!(hdl.level(), 5);
/// ```
pub fn replace_range(&mut self, range: TextRange, replace_with: impl AsRef<str>) {
let replace_with = replace_with.as_ref();
match (
RangeShape::new(self.document().syntax, range),
ReplaceWithShape::new(replace_with),
) {
(
RangeShape::ExactHeadline { headline, level },
ReplaceWithShape::IncludeHeadline { level: new_level },
)
| (
RangeShape::InsideHeadline { headline, level },
ReplaceWithShape::IncludeHeadline { level: new_level },
) if level < new_level => self.replace_headline(headline, range, replace_with),
(
RangeShape::ExactHeadline { headline, level },
ReplaceWithShape::ExactHeadline { level: new_level },
) if level <= new_level
// non-last headline must ends with a newline
&& (headline.end() == self.document().end()
|| replace_with.ends_with(&['\n', '\r'])) =>
{
self.replace_headline(headline, range, replace_with)
}
(
RangeShape::InsideHeadline { headline, level },
ReplaceWithShape::ExactHeadline { level: new_level },
) if level <= new_level && follows_newline(headline.syntax(), range.start()) => {
self.replace_headline(headline, range, replace_with)
}
_ => self.full_parse(range, replace_with),
}
}
fn full_parse(&mut self, range: TextRange, replace_with: &str) {
if self.document().syntax().text_range() == range {
let input = (replace_with, &self.config).into();
self.green = document_node(input).unwrap().1.into_node().unwrap();
} else {
let start: usize = range.start().into();
let end: usize = range.end().into();
let mut text = self.green.to_string();
text.replace_range(start..end, replace_with);
let input = (text.as_ref(), &self.config).into();
self.green = document_node(input).unwrap().1.into_node().unwrap();
}
}
fn replace_headline(&mut self, headline: Headline, range: TextRange, replace_with: &str) {
if headline.syntax().text_range() == range {
let input = (replace_with, &self.config).into();
self.green = headline
.syntax
.replace_with(headline_node(input).unwrap().1.into_node().unwrap());
} else {
let offset: usize = headline.syntax.text_range().start().into();
let start: usize = range.start().into();
let end: usize = range.end().into();
let mut text = headline.syntax.to_string();
text.replace_range((start - offset)..(end - offset), replace_with);
let input = (text.as_ref(), &self.config).into();
self.green = headline
.syntax
.replace_with(headline_node(input).unwrap().1.into_node().unwrap());
}
}
}
fn follows_newline(syntax: &SyntaxNode<OrgLanguage>, offset: TextSize) -> bool {
match syntax.token_at_offset(offset) {
TokenAtOffset::None => false,
TokenAtOffset::Single(t) => {
let offset: usize = (offset - t.text_range().start()).into();
t.text()[offset..].ends_with('\n') || t.text()[offset..].ends_with('\r')
}
TokenAtOffset::Between(t, _) => t.text().ends_with('\n') || t.text().ends_with('\r'),
}
}
#[test]
fn replace() {
assert!(follows_newline(
Org::parse("\n*a*").document().syntax(),
TextSize::new(1)
));
assert!(follows_newline(
Org::parse(" \na").document().syntax(),
TextSize::new(1)
));
assert!(follows_newline(
Org::parse(" \ra").document().syntax(),
TextSize::new(1)
));
assert!(!follows_newline(
Org::parse(" *a*").document().syntax(),
TextSize::new(1)
));
assert!(!follows_newline(
Org::parse(" a").document().syntax(),
TextSize::new(1)
));
assert_eq!(ReplaceWithShape::new(""), ReplaceWithShape::Other);
assert_eq!(ReplaceWithShape::new(" ** a"), ReplaceWithShape::Other);
assert_eq!(
ReplaceWithShape::new("\n** a"),
ReplaceWithShape::IncludeHeadline { level: 2 }
);
assert_eq!(
ReplaceWithShape::new("** a"),
ReplaceWithShape::ExactHeadline { level: 2 }
);
assert_eq!(
ReplaceWithShape::new("** a\n* 1"),
ReplaceWithShape::IncludeHeadline { level: 1 }
);
assert_eq!(
ReplaceWithShape::new("* a\n** 1"),
ReplaceWithShape::ExactHeadline { level: 1 }
);
assert_eq!(
ReplaceWithShape::new("** a\n** 1"),
ReplaceWithShape::IncludeHeadline { level: 2 }
);
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(0.into(), 7.into())
),
RangeShape::ExactHeadline { level: 2, .. }
));
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(3.into(), 7.into())
),
RangeShape::InsideHeadline { level: 2, .. }
));
assert!(matches!(
RangeShape::new(
Org::parse("** abc\n** b").document().syntax,
TextRange::new(2.into(), 7.into())
),
RangeShape::Other
));
assert!(matches!(
RangeShape::new(
Org::parse("* abc\n** b").document().syntax,
TextRange::new(4.into(), 7.into())
),
RangeShape::InsideHeadline { level: 1, .. }
));
macro_rules! t {
($input:literal, $replace:literal) => {
let start = $input.find('|').unwrap();
let end = $input.rfind('|').unwrap();
let input = format!(
"{}{}{}",
&$input[0..start],
&$input[start + 1..end],
&$input[end + 1..]
);
let output = format!("{}{}{}", &$input[0..start], $replace, &$input[end + 1..]);
let mut org = Org::parse(input);
org.replace_range(
TextRange::new((start as u32).into(), (end as u32 - 1).into()),
$replace,
);
debug_assert_eq!(
format!("{:#?}", org.document().syntax),
format!("{:#?}", Org::parse(output).document().syntax),
);
};
}
t!("||", "");
t!("||", "** abc");
t!("*** abc |edf|", "fde");
t!("*|** abc edf|", "fde");
t!("* abc \n|** edf|", "** abc");
t!("* ab|c \n*| edf", "** abc");
t!("* abc \n|** edf|", "** abc");
t!("* abc \n|** edf|", "** eee\n** eee");
t!("* abc \n|** edf|", "*** abc");
t!("* abc \n*|* edf|", "*** abc");
t!("* abc \n**| edf|", "*** abc");
t!("* abc \n**| |edf", "*** abc");
t!("* abc \n** |edf|", "*** abc");
t!("* abc \n** |edf|", "\n*** abc");
t!("* abc \n** |edf|", "\n** abc");
t!("* abc \n** |edf|", "\n* abc");
t!("* abc \n** \n|edf|", "* abc");
t!("* abc \n** \n|edf|", "* abc\n* abc");
t!("* abc \n** |edf|", "* abc");
t!("* abc \n** |edf|", "* abc\n* abc");
t!("* abc \n|* edf\n|* gh", "* hg");
t!("* abc \n|* edf\n|* gh", "* hg\n");
t!("* abc \n* edf\n|* gh|", "* hg");
}

View file

@ -1,305 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_while, take_while1},
character::complete::{alpha1, space0, space1},
combinator::{cond, opt},
sequence::{separated_pair, tuple},
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement,
NodeBuilder,
},
element::element_nodes,
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind::*,
};
fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, affiliated_keywords) = affiliated_keyword_nodes(input)?;
let (input, (block_begin, name)) = block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
let kind = match name {
s if s.eq_ignore_ascii_case("COMMENT") => COMMENT_BLOCK,
s if s.eq_ignore_ascii_case("EXAMPLE") => EXAMPLE_BLOCK,
s if s.eq_ignore_ascii_case("EXPORT") => EXPORT_BLOCK,
s if s.eq_ignore_ascii_case("SRC") => SOURCE_BLOCK,
s if s.eq_ignore_ascii_case("CENTER") => CENTER_BLOCK,
s if s.eq_ignore_ascii_case("QUOTE") => QUOTE_BLOCK,
s if s.eq_ignore_ascii_case("VERSE") => VERSE_BLOCK,
_ => SPECIAL_BLOCK,
};
for (input, contents) in line_starts_iter(&input).map(|i| input.take_split(i)) {
if let Ok((input, block_end)) = block_end_node(input, name) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![];
children.extend(affiliated_keywords);
children.push(block_begin);
children.extend(pre_blank);
if kind.is_greater_element() {
children.push(node(BLOCK_CONTENT, element_nodes(contents)?));
} else {
children.push(node(BLOCK_CONTENT, comma_quoted_text_nodes(contents)));
}
children.push(block_end);
children.extend(post_blank);
return Ok((input, node(kind, children)));
}
}
Err(nom::Err::Error(()))
}
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws1);
b.text(begin);
b.text(name);
if name.eq_ignore_ascii_case("SRC") {
let (input, language) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?;
let (input, ws1) = space0(input)?;
let (input, (parameters, ws2, nl)) = trim_line_end(input)?;
if let Some((ws, language)) = language {
b.ws(ws);
b.token(SRC_BLOCK_LANGUAGE, language);
}
if let Some((ws, switches)) = switches {
b.ws(ws);
b.token(SRC_BLOCK_SWITCHES, switches);
}
b.ws(ws1);
if !parameters.is_empty() {
b.token(SRC_BLOCK_PARAMETERS, parameters);
}
b.ws(ws2);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else if name.eq_ignore_ascii_case("EXPORT") {
let (input, ty) = opt(tuple((
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
)))(input)?;
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
if let Some((ws, ty)) = ty {
b.ws(ws);
b.token(EXPORT_BLOCK_TYPE, ty);
}
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
} else {
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
let (input, nl) = eol_or_eof(input)?;
b.text(data);
b.nl(nl);
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
}
}
fn source_block_switches(input: Input) -> IResult<Input, Input, ()> {
let mut i = input;
while !i.is_empty() {
match tuple::<_, _, (), _>((
cond(i.len() != input.len(), space1),
alt((
separated_pair(
alt((tag("-l"), tag("-n"))),
space1,
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
),
tuple((tag("+"), alpha1)),
tuple((tag("-"), alpha1)),
)),
))(i)
{
Ok((i_, _)) => i = i_,
_ => break,
}
}
let len = input.len() - i.len();
if len == 0 {
Err(nom::Err::Error(()))
} else {
Ok(input.take_split(len))
}
}
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
let (input, (ws, end, name, ws_, nl)) =
tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.text(name);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(BLOCK_END)))
}
fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
let mut nodes = vec![];
let s = input.as_str();
let mut start = 0;
for i in line_starts_iter(s) {
// line must start with either ",*" or ",#+"
if s.get(i..i + 2) != Some(",*") && s.get(i..i + 3) != Some(",#+") {
continue;
}
let text = &s[start..i];
if !text.is_empty() {
nodes.push(token(TEXT, text));
}
nodes.push(token(COMMA, ","));
start = i + 1;
}
if !s[start..].is_empty() {
nodes.push(token(TEXT, &s[start..]));
}
nodes
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(block_node_base, input)
}
#[test]
fn test_parse() {
use crate::ast::{ExampleBlock, SourceBlock};
use crate::tests::to_ast;
let to_src_block = to_ast::<SourceBlock>(block_node);
let to_example_block = to_ast::<ExampleBlock>(block_node);
insta::assert_debug_snapshot!(
to_example_block(
r#"#+BEGIN_EXAMPLE
,* headline
,#+block
text
#+END_EXAMPLE"#
).syntax,
@r###"
EXAMPLE_BLOCK@0..59
BLOCK_BEGIN@0..16
TEXT@0..8 "#+BEGIN_"
TEXT@8..15 "EXAMPLE"
NEW_LINE@15..16 "\n"
BLOCK_CONTENT@16..42
COMMA@16..17 ","
TEXT@17..28 "* headline\n"
COMMA@28..29 ","
TEXT@29..42 "#+block\ntext\n"
BLOCK_END@42..59
WHITESPACE@42..46 " "
TEXT@46..52 "#+END_"
TEXT@52..59 "EXAMPLE"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC
#+END_SRC"#
).syntax,
@r###"
SOURCE_BLOCK@0..27
BLOCK_BEGIN@0..12
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13 "\n"
BLANK_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
TEXT@18..24 "#+END_"
TEXT@24..27 "SRC"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+begin_src
#+end_src"#
).syntax,
@r###"
SOURCE_BLOCK@0..25
BLOCK_BEGIN@0..12
TEXT@0..8 "#+begin_"
TEXT@8..11 "src"
NEW_LINE@11..12 "\n"
BLOCK_CONTENT@12..12
BLOCK_END@12..25
WHITESPACE@12..16 " "
TEXT@16..22 "#+end_"
TEXT@22..25 "src"
"###
);
insta::assert_debug_snapshot!(
to_src_block(
r#"#+BEGIN_SRC javascript -n 20 -r :var n=0, l=2 :foo=bar
alert('Hello World!');
#+END_SRC
"#).syntax,
@r###"
SOURCE_BLOCK@0..100
BLOCK_BEGIN@0..58
TEXT@0..8 "#+BEGIN_"
TEXT@8..11 "SRC"
WHITESPACE@11..12 " "
SRC_BLOCK_LANGUAGE@12..22 "javascript"
WHITESPACE@22..24 " "
SRC_BLOCK_SWITCHES@24..32 "-n 20 -r"
WHITESPACE@32..34 " "
SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar"
NEW_LINE@57..58 "\n"
BLOCK_CONTENT@58..81
TEXT@58..81 "alert('Hello World!');\n"
BLOCK_END@81..95
WHITESPACE@81..85 " "
TEXT@85..91 "#+END_"
TEXT@91..94 "SRC"
NEW_LINE@94..95 "\n"
BLANK_LINE@95..96 "\n"
BLANK_LINE@96..100 " "
"###
);
// TODO: more testing
}

View file

@ -1,134 +0,0 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{digit1, space0},
combinator::{map, opt, recognize},
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder,
},
input::Input,
timestamp::{timestamp_active_node, timestamp_inactive_node},
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn clock_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
space0,
tag("CLOCK:"),
space0,
alt((timestamp_inactive_node, timestamp_active_node)),
opt(tuple((
space0,
double_arrow_token,
space0,
recognize(tuple((digit1, colon_token, digit1))),
))),
space0,
eol_or_eof,
blank_lines,
)),
|(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| {
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(clock);
b.ws(ws_);
b.push(timestamp);
if let Some((ws, double_arrow, ws_, time)) = duration {
b.ws(ws);
b.push(double_arrow);
b.ws(ws_);
b.text(time);
}
b.ws(ws__);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::CLOCK)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Clock;
use crate::tests::to_ast;
let to_clock = to_ast::<Clock>(clock_node);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]").syntax,
@r###"
CLOCK@0..29
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..29
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
"###
);
insta::assert_debug_snapshot!(
to_clock("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00\n\n").syntax,
@r###"
CLOCK@0..64
TEXT@0..6 "CLOCK:"
WHITESPACE@6..7 " "
TIMESTAMP_INACTIVE@7..53
L_BRACKET@7..8 "["
TIMESTAMP_YEAR@8..12 "2003"
MINUS@12..13 "-"
TIMESTAMP_MONTH@13..15 "09"
MINUS@15..16 "-"
TIMESTAMP_DAY@16..18 "16"
WHITESPACE@18..19 " "
TIMESTAMP_DAYNAME@19..22 "Tue"
WHITESPACE@22..23 " "
TIMESTAMP_HOUR@23..25 "09"
COLON@25..26 ":"
TIMESTAMP_MINUTE@26..28 "39"
R_BRACKET@28..29 "]"
MINUS2@29..31 "--"
L_BRACKET@31..32 "["
TIMESTAMP_YEAR@32..36 "2003"
MINUS@36..37 "-"
TIMESTAMP_MONTH@37..39 "09"
MINUS@39..40 "-"
TIMESTAMP_DAY@40..42 "16"
WHITESPACE@42..43 " "
TIMESTAMP_DAYNAME@43..46 "Tue"
WHITESPACE@46..47 " "
TIMESTAMP_HOUR@47..49 "10"
COLON@49..50 ":"
TIMESTAMP_MINUTE@50..52 "39"
R_BRACKET@52..53 "]"
WHITESPACE@53..54 " "
DOUBLE_ARROW@54..56 "=>"
WHITESPACE@56..58 " "
TEXT@58..62 "1:00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64 "\n"
"###
);
}

View file

@ -1,162 +0,0 @@
use nom::{bytes::complete::take_until, combinator::opt, sequence::tuple, IResult, InputTake};
use crate::syntax::{
combinator::{at_token, l_curly2_token, l_curly_token, r_curly_token},
object::standard_object_nodes,
};
use super::{
combinator::{GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn cloze_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(cloze_node_base, input)
}
fn cloze_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, l_curly2) = l_curly2_token(input)?;
let mut inside_latex = false;
let mut text_end = 0;
for (index, byte) in input.bytes().enumerate() {
match byte {
b'}' if !inside_latex => {
text_end = index;
break;
}
b'$' => {
inside_latex = !inside_latex;
}
_ => {}
}
}
if text_end == 0 {
return Err(nom::Err::Error(()));
}
let (input, text) = input.take_split(text_end);
let (input, r_curly) = r_curly_token(input)?;
let (input, hint) = opt(tuple((l_curly_token, take_until("}"), r_curly_token)))(input)?;
let (input, id) = opt(tuple((at_token, take_until("}"))))(input)?;
let (input, r_curly_) = r_curly_token(input)?;
let mut b = NodeBuilder::new();
b.push(l_curly2);
b.children.extend(standard_object_nodes(text));
b.push(r_curly);
if let Some((l_curly, hint, r_curly)) = hint {
b.push(l_curly);
b.token(SyntaxKind::TEXT, hint);
b.push(r_curly);
}
if let Some((at, id)) = id {
b.push(at);
b.token(SyntaxKind::TEXT, id);
}
b.push(r_curly_);
Ok((input, b.finish(SyntaxKind::CLOZE)))
}
#[test]
fn parse() {
use crate::ast::Cloze;
use crate::config::ParseConfig;
use crate::tests::to_ast;
let to_cloze = to_ast::<Cloze>(cloze_node);
insta::assert_debug_snapshot!(
to_cloze("{{text}}").syntax,
@r###"
CLOZE@0..8
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
R_CURLY@7..8 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}@id}").syntax,
@r###"
CLOZE@0..11
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
AT@7..8 "@"
TEXT@8..10 "id"
R_CURLY@10..11 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}}").syntax,
@r###"
CLOZE@0..14
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
R_CURLY@13..14 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{text}{hint}@id}").syntax,
@r###"
CLOZE@0..17
L_CURLY2@0..2 "{{"
TEXT@2..6 "text"
R_CURLY@6..7 "}"
L_CURLY@7..8 "{"
TEXT@8..12 "hint"
R_CURLY@12..13 "}"
AT@13..14 "@"
TEXT@14..16 "id"
R_CURLY@16..17 "}"
"###
);
insta::assert_debug_snapshot!(
to_cloze("{{$\\frac{a}{b}$}{fractions}}").syntax,
@r###"
CLOZE@0..28
L_CURLY2@0..2 "{{"
LATEX_FRAGMENT@2..15
DOLLAR@2..3 "$"
TEXT@3..14 "\\frac{a}{b}"
DOLLAR@14..15 "$"
R_CURLY@15..16 "}"
L_CURLY@16..17 "{"
TEXT@17..26 "fractions"
R_CURLY@26..27 "}"
R_CURLY@27..28 "}"
"###
);
let config = &ParseConfig::default();
assert!(cloze_node(("{{}}", config).into()).is_err());
assert!(cloze_node(("{{text}", config).into()).is_err());
assert!(cloze_node(("{text}}", config).into()).is_err());
assert!(cloze_node(("{{text}{}", config).into()).is_err());
assert!(cloze_node(("{{text}a}", config).into()).is_err());
}

View file

@ -1,327 +0,0 @@
use memchr::{memchr2, memchr2_iter, Memchr2};
use nom::{bytes::complete::tag, IResult, InputTake, Slice};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
use std::iter::once;
use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*};
pub type GreenElement = NodeOrToken<GreenNode, GreenToken>;
#[inline]
pub fn token(kind: SyntaxKind, input: &str) -> GreenElement {
GreenElement::Token(GreenToken::new(OrgLanguage::kind_to_raw(kind), input))
}
#[inline]
pub fn node<I>(kind: SyntaxKind, children: I) -> GreenElement
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
GreenElement::Node(GreenNode::new(OrgLanguage::kind_to_raw(kind), children))
}
macro_rules! token_parser {
($name:ident, $token:literal, $kind:ident) => {
#[doc = "Recognizes `"]
#[doc = $token]
#[doc = "` and returns GreenToken"]
pub fn $name(input: Input) -> IResult<Input, GreenElement, ()> {
let (i, o) = tag($token)(input)?;
Ok((i, token($kind, o.as_str())))
}
};
}
token_parser!(l_bracket_token, "[", L_BRACKET);
token_parser!(r_bracket_token, "]", R_BRACKET);
token_parser!(l_bracket2_token, "[[", L_BRACKET2);
token_parser!(r_bracket2_token, "]]", R_BRACKET2);
token_parser!(l_parens_token, "(", L_PARENS);
token_parser!(r_parens_token, ")", R_PARENS);
token_parser!(l_angle_token, "<", L_ANGLE);
token_parser!(r_angle_token, ">", R_ANGLE);
token_parser!(l_curly_token, "{", L_CURLY);
#[cfg(feature = "syntax-org-fc")]
token_parser!(l_curly2_token, "{{", L_CURLY2);
token_parser!(r_curly_token, "}", R_CURLY);
token_parser!(l_curly3_token, "{{{", L_CURLY3);
token_parser!(r_curly3_token, "}}}", R_CURLY3);
token_parser!(l_angle2_token, "<<", L_ANGLE2);
token_parser!(r_angle2_token, ">>", R_ANGLE2);
token_parser!(l_angle3_token, "<<<", L_ANGLE3);
token_parser!(r_angle3_token, ">>>", R_ANGLE3);
token_parser!(at_token, "@", AT);
token_parser!(at2_token, "@@", AT2);
token_parser!(minus2_token, "--", MINUS2);
// token_parser!(percent_token, "%", PERCENT);
token_parser!(percent2_token, "%%", PERCENT2);
// token_parser!(slash_token, "/", SLASH);
token_parser!(backslash_token, "\\", BACKSLASH);
token_parser!(underscore_token, "_", UNDERSCORE);
// token_parser!(star_token, "*", STAR);
// token_parser!(plus_token, "+", PLUS);
token_parser!(minus_token, "-", MINUS);
token_parser!(colon_token, ":", COLON);
token_parser!(colon2_token, "::", COLON2);
token_parser!(pipe_token, "|", PIPE);
token_parser!(dollar_token, "$", DOLLAR);
token_parser!(dollar2_token, "$$", DOLLAR2);
// token_parser!(equal_token, "=", EQUAL);
// token_parser!(tilde_token, "~", TILDE);
token_parser!(hash_plus_token, "#+", HASH_PLUS);
token_parser!(caret_token, "^", CARET);
token_parser!(hash_token, "#", HASH);
token_parser!(double_arrow_token, "=>", DOUBLE_ARROW);
macro_rules! lossless_parser {
($parser:expr, $input:expr) => {{
let i_ = $input;
let (i, o) = $parser($input)?;
cfg_if::cfg_if! {
if #[cfg(feature = "tracing")] {
tracing::trace!(consumed = o.to_string());
}
}
debug_assert_eq!(
&i_.as_str()[0..(i_.len() - i.len())],
&o.to_string(),
stringify!("parser must be lossless")
);
Ok((i, o))
}};
}
pub(crate) use lossless_parser;
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
if input.is_empty() {
return Ok((input, vec![]));
}
let mut lines = vec![];
let mut start = 0;
let bytes = input.as_bytes();
for index in line_ends_iter(input.as_str()) {
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
start = index;
} else {
break;
}
}
Ok((input.slice(start..), lines))
}
#[test]
fn test_blank_lines() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = blank_lines(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines(("\n", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 1);
assert_eq!(output[0].to_string(), "\n");
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
let (input, output) = blank_lines((" \r\n\n\t\t\r\n \n ", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 5);
assert_eq!(output[0].to_string(), " \r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n ");
assert_eq!(output.len(), 6);
assert_eq!(output[0].to_string(), "\r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), "\r");
assert_eq!(output[5].to_string(), " \r");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding
pub fn trim_line_end(input: Input) -> IResult<Input, (Input, Input, Input), ()> {
let bytes = input.as_bytes();
let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) {
Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => (
input.slice(i + 2..),
input.slice(0..i),
input.slice(i..i + 2),
),
Some(i) => (
input.slice(i + 1..),
input.slice(0..i),
input.slice(i..i + 1),
),
_ => (input.of(""), input, input.of("")),
};
let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) {
Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)),
None => (contents.of(""), contents),
};
Ok((input, (contents, ws, nl)))
}
#[test]
fn test_trim_line_end() {
use crate::config::ParseConfig;
let config = &ParseConfig::default();
let (input, output) = trim_line_end(("", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) = trim_line_end(("* hello, world :abc:", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), "");
assert_eq!(output.2.as_str(), "");
let (input, output) =
trim_line_end(("* hello, world :abc: \r\nrest\n", config).into()).unwrap();
assert_eq!(input.as_str(), "rest\n");
assert_eq!(output.0.as_str(), "* hello, world :abc:");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r\n");
let (input, output) = trim_line_end((" \rr", config).into()).unwrap();
assert_eq!(input.as_str(), "r");
assert_eq!(output.0.as_str(), "");
assert_eq!(output.1.as_str(), " ");
assert_eq!(output.2.as_str(), "\r");
}
/// Recognizes a line ending \r, \n, \r\n or end of file
pub fn eol_or_eof(input: Input) -> IResult<Input, Input, ()> {
let mut bytes = input.bytes();
let count = match bytes.next() {
Some(b'\n') => 1,
Some(b'\r') => {
if matches!(bytes.next(), Some(b'\n')) {
2
} else {
1
}
}
None => 0,
_ => return Err(nom::Err::Error(())),
};
Ok(input.take_split(count))
}
struct LineStart<'a> {
bytes: &'a [u8],
iter: Memchr2<'a>,
}
impl<'a> LineStart<'a> {
fn new(input: &'a str) -> Self {
let bytes = input.as_bytes();
LineStart {
bytes,
iter: memchr2_iter(b'\r', b'\n', bytes),
}
}
}
impl<'a> Iterator for LineStart<'a> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
let i = self.iter.next()?;
if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') {
let ii = self.iter.next();
debug_assert_eq!(i + 1, ii.unwrap());
Some(i + 2)
} else {
Some(i + 1)
}
}
}
/// Returns an iterator of positions of line start, including zero
pub fn line_starts_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
once(0).chain(LineStart::new(s))
}
/// Returns an iterator of positions of line end, including eof
pub fn line_ends_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
LineStart::new(s).chain(once(s.len()))
}
pub struct NodeBuilder {
pub children: Vec<GreenElement>,
}
impl NodeBuilder {
pub fn new() -> NodeBuilder {
NodeBuilder { children: vec![] }
}
pub fn ws(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(i.bytes().all(|c| c.is_ascii_whitespace()));
self.children.push(i.ws_token())
}
}
pub fn nl(&mut self, i: Input) {
if !i.is_empty() {
debug_assert!(
i.s == "\n" || i.s == "\r\n" || i.s == "\r",
"{:?} should be a new line",
i.s
);
self.children.push(i.nl_token())
}
}
pub fn text(&mut self, i: Input) {
if !i.is_empty() {
self.children.push(i.text_token())
}
}
pub fn token(&mut self, kind: SyntaxKind, i: Input) {
self.children.push(i.token(kind))
}
pub fn push(&mut self, elem: GreenElement) {
self.children.push(elem)
}
pub fn push_opt(&mut self, elem: Option<GreenElement>) {
if let Some(elem) = elem {
self.children.push(elem)
}
}
pub fn len(&self) -> usize {
self.children.len()
}
pub fn finish(self, kind: SyntaxKind) -> GreenElement {
GreenElement::Node(GreenNode::new(kind.into(), self.children))
}
}

View file

@ -1,115 +0,0 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
SyntaxKind,
};
fn comment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let mut iter = iterator(
input,
opt(tuple((
space0,
tag("#"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::HASH, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::COMMENT)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn comment_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(comment_node_base, input)
}
#[test]
fn parse() {
use crate::{
syntax::{comment::comment_node, input::Input, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
SyntaxNode::new_root(
comment_node(Input {
s: input,
c: &ParseConfig::default(),
})
.unwrap()
.1
.into_node()
.unwrap(),
)
};
insta::assert_debug_snapshot!(
t("#"),
@r###"
COMMENT@0..1
HASH@0..1 "#"
"###
);
insta::assert_debug_snapshot!(
t("#\n # a\n #\n\n"),
@r###"
COMMENT@0..12
HASH@0..1 "#"
TEXT@1..2 "\n"
WHITESPACE@2..4 " "
HASH@4..5 "#"
WHITESPACE@5..6 " "
TEXT@6..7 "a"
TEXT@7..8 "\n"
WHITESPACE@8..9 " "
HASH@9..10 "#"
TEXT@10..11 "\n"
BLANK_LINE@11..12 "\n"
"###
);
insta::assert_debug_snapshot!(
t("#\na\n #\n\n"),
@r###"
COMMENT@0..2
HASH@0..1 "#"
TEXT@1..2 "\n"
"###
);
}

View file

@ -1,147 +0,0 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::map,
sequence::{pair, separated_pair, tuple},
IResult,
};
use super::{
combinator::{l_bracket_token, node, r_bracket_token, token, GreenElement},
input::Input,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn cookie_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
l_bracket_token,
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
r_bracket_token,
)),
|(l_bracket, value, r_bracket)| {
let mut children = vec![l_bracket];
children.push(token(TEXT, value.0.as_str()));
match value.1.as_str() {
"%" => {
children.push(token(PERCENT, value.1.as_str()));
}
_ => {
children.push(token(SLASH, "/"));
children.push(token(TEXT, value.1.as_str()));
}
}
children.push(r_bracket);
node(COOKIE, children)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ast::Cookie;
use crate::tests::to_ast;
use crate::ParseConfig;
let to_cookie = to_ast::<Cookie>(cookie_node);
insta::assert_debug_snapshot!(
to_cookie("[1/10]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..5 "10"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[1/1000]").syntax,
@r###"
COOKIE@0..8
L_BRACKET@0..1 "["
TEXT@1..2 "1"
SLASH@2..3 "/"
TEXT@3..7 "1000"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[10%]").syntax,
@r###"
COOKIE@0..5
L_BRACKET@0..1 "["
TEXT@1..3 "10"
PERCENT@3..4 "%"
R_BRACKET@4..5 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[%]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
PERCENT@1..2 "%"
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/]").syntax,
@r###"
COOKIE@0..3
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..2 ""
R_BRACKET@2..3 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[100/]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..4 "100"
SLASH@4..5 "/"
TEXT@5..5 ""
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_cookie("[/100]").syntax,
@r###"
COOKIE@0..6
L_BRACKET@0..1 "["
TEXT@1..1 ""
SLASH@1..2 "/"
TEXT@2..5 "100"
R_BRACKET@5..6 "]"
"###
);
let config = &ParseConfig::default();
assert!(cookie_node(("[10% ]", config).into()).is_err());
assert!(cookie_node(("[1//100]", config).into()).is_err());
assert!(cookie_node(("[1\\100]", config).into()).is_err());
assert!(cookie_node(("[10%%]", config).into()).is_err());
}

View file

@ -1,139 +0,0 @@
use nom::{combinator::opt, IResult};
use super::{
combinator::{blank_lines, node, GreenElement},
drawer::property_drawer_node,
headline::{headline_node, section_node},
input::Input,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn document_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(document_node_base, input)
}
fn document_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
if input.is_empty() {
return Ok((input, node(DOCUMENT, [])));
}
let mut children = vec![];
let (input, property_drawer) = opt(property_drawer_node)(input)?;
if let Some(property_drawer) = property_drawer {
children.push(property_drawer);
}
let (input, pre_blank) = blank_lines(input)?;
children.extend(pre_blank);
if input.is_empty() {
return Ok((input, node(DOCUMENT, children)));
}
let (input, section) = opt(section_node)(input)?;
if let Some(section) = section {
children.push(section);
}
let mut i = input;
while !i.is_empty() {
let (input, headline) = headline_node(i)?;
debug_assert!(i.len() > input.len(), "{} > {}", i.len(), input.len(),);
i = input;
children.push(headline);
}
Ok((i, node(DOCUMENT, children)))
}
#[test]
fn parse() {
use crate::ast::Document;
use crate::tests::to_ast;
let to_document = to_ast::<Document>(document_node);
insta::assert_debug_snapshot!(
to_document("").syntax,
@r###"
DOCUMENT@0..0
"###
);
insta::assert_debug_snapshot!(
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1 "\n"
BLANK_LINE@1..4 " \n"
BLANK_LINE@4..5 "\n"
"###
);
insta::assert_debug_snapshot!(
to_document("section").syntax,
@r###"
DOCUMENT@0..7
SECTION@0..7
PARAGRAPH@0..7
TEXT@0..7 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
HEADLINE_TITLE@3..10
TEXT@3..10 "section"
"###
);
insta::assert_debug_snapshot!(
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "
HEADLINE_TITLE@4..13
TEXT@4..13 "heading 2"
NEW_LINE@13..14 "\n"
HEADLINE@14..25
HEADLINE_STARS@14..15 "*"
WHITESPACE@15..16 " "
HEADLINE_TITLE@16..25
TEXT@16..25 "heading 1"
"###
);
insta::assert_debug_snapshot!(
to_document("section\n** heading 2\n*heading 1").syntax,
@r###"
DOCUMENT@0..31
SECTION@0..8
PARAGRAPH@0..8
TEXT@0..8 "section\n"
HEADLINE@8..31
HEADLINE_STARS@8..10 "**"
WHITESPACE@10..11 " "
HEADLINE_TITLE@11..20
TEXT@11..20 "heading 2"
NEW_LINE@20..21 "\n"
SECTION@21..31
PARAGRAPH@21..31
TEXT@21..31 "*heading 1"
"###
);
}

View file

@ -1,275 +0,0 @@
use nom::{
bytes::complete::{tag_no_case, take_while1},
character::complete::{space0, space1},
combinator::{iterator, map, verify},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, colon_token, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement,
NodeBuilder,
},
element::element_nodes,
input::Input,
SyntaxKind::*,
};
fn drawer_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
let mut b = NodeBuilder::new();
let (input, (ws, colon, name, colon_, ws_, nl)) = tuple((
space0,
colon_token,
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
colon_token,
space0,
eol_or_eof,
))(input)?;
b.ws(ws);
b.push(colon);
b.text(name);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, (b.finish(DRAWER_BEGIN), name.as_str())))
}
fn drawer_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, colon, end, colon_, ws_, nl)) = tuple((
space0,
colon_token,
tag_no_case("END"),
colon_token,
space0,
eol_or_eof,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.push(colon);
b.text(end);
b.push(colon_);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DRAWER_END)))
}
fn drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, _)) = drawer_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = drawer_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
if !contents.is_empty() {
children.push(node(DRAWER_CONTENT, element_nodes(contents)?));
} else {
children.push(node(DRAWER_CONTENT, []));
}
children.push(end);
children.extend(post_blank);
return Ok((input, node(DRAWER, children)));
}
}
Err(nom::Err::Error(()))
}
fn property_drawer_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (begin, name)) = drawer_begin_node(input)?;
if !name.eq_ignore_ascii_case("properties") {
return Err(nom::Err::Error(()));
}
let mut children = vec![begin];
let mut it = iterator(input, node_property_node);
children.extend(&mut it);
let (input, _) = it.finish()?;
let (input, end) = drawer_end_node(input)?;
let (input, post_blank) = blank_lines(input)?;
children.push(end);
children.extend(post_blank);
Ok((input, node(PROPERTY_DRAWER, children)))
}
fn node_property_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, ws1) = space0(input)?;
let (input, colon1) = colon_token(input)?;
let (input, (colon2, name)) = map(
verify(
take_while1(|c| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
|i: &Input| i.ends_with(':'),
),
|input: Input| input.take_split(input.len() - 1),
)(input)?;
let (input, ws2) = space1(input)?;
let (input, (value, ws3, nl)) = trim_line_end(input)?;
let mut b = NodeBuilder::new();
b.ws(ws1);
b.push(colon1);
if name.ends_with('+') {
let (plus, name) = name.take_split(name.len() - 1);
b.text(name);
b.token(PLUS, plus);
} else {
b.text(name);
}
b.token(COLON, colon2);
b.ws(ws2);
b.text(value);
b.ws(ws3);
b.nl(nl);
Ok((input, b.finish(NODE_PROPERTY)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn property_drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(!input.is_empty());
crate::lossless_parser!(property_drawer_node_base, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn drawer_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(drawer_node_base, input)
}
#[test]
fn parse() {
use crate::{
ast::{Drawer, PropertyDrawer},
tests::to_ast,
ParseConfig,
};
let to_drawer = to_ast::<Drawer>(drawer_node);
let to_property_drawer = to_ast::<PropertyDrawer>(property_drawer_node);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:CUSTOM_ID: id
:END:"#
).syntax,
@r###"
DRAWER@0..33
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
DRAWER_CONTENT@9..26
PARAGRAPH@9..26
TEXT@9..18 " :CUSTOM"
SUBSCRIPT@18..21
UNDERSCORE@18..19 "_"
TEXT@19..21 "ID"
TEXT@21..26 ": id\n"
DRAWER_END@26..33
WHITESPACE@26..28 " "
COLON@28..29 ":"
TEXT@29..32 "END"
COLON@32..33 ":"
"###
);
insta::assert_debug_snapshot!(
to_drawer(
r#":DRAWER:
:END:
"#
).syntax,
@r###"
DRAWER@0..19
DRAWER_BEGIN@0..9
COLON@0..1 ":"
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10 "\n"
DRAWER_CONTENT@10..10
DRAWER_END@10..18
WHITESPACE@10..12 " "
COLON@12..13 ":"
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19 "\n"
"###
);
// https://github.com/PoiScript/orgize/issues/70#issuecomment-2099671563
insta::assert_debug_snapshot!(
to_property_drawer(r#":PROPERTIES:
:header-args:clojure: :session *clojure-1*
:NAME: VALUE
:NAME+: VALUE
:END:"#).syntax,
@r###"
PROPERTY_DRAWER@0..91
DRAWER_BEGIN@0..13
COLON@0..1 ":"
TEXT@1..11 "PROPERTIES"
COLON@11..12 ":"
NEW_LINE@12..13 "\n"
NODE_PROPERTY@13..59
COLON@13..14 ":"
TEXT@14..33 "header-args:clojure"
COLON@33..34 ":"
WHITESPACE@34..38 " "
TEXT@38..58 ":session *clojure-1*"
NEW_LINE@58..59 "\n"
NODE_PROPERTY@59..72
COLON@59..60 ":"
TEXT@60..64 "NAME"
COLON@64..65 ":"
WHITESPACE@65..66 " "
TEXT@66..71 "VALUE"
NEW_LINE@71..72 "\n"
NODE_PROPERTY@72..86
COLON@72..73 ":"
TEXT@73..77 "NAME"
PLUS@77..78 "+"
COLON@78..79 ":"
WHITESPACE@79..80 " "
TEXT@80..85 "VALUE"
NEW_LINE@85..86 "\n"
DRAWER_END@86..91
COLON@86..87 ":"
TEXT@87..90 "END"
COLON@90..91 ":"
"###
);
let config = &ParseConfig::default();
// https://github.com/PoiScript/orgize/issues/9
assert!(drawer_node((":SPAGHETTI:\n", config).into()).is_err());
assert!(property_drawer_node((":PROPERTIES:\n:NAME:VALUE\n:END:", config).into()).is_err());
}

View file

@ -1,107 +0,0 @@
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space0, space1},
sequence::tuple,
IResult, InputTake,
};
use super::{
combinator::{
blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder,
},
input::Input,
SyntaxKind::*,
};
fn dyn_block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, begin) = dyn_block_begin_node(input)?;
let (input, pre_blank) = blank_lines(input)?;
for (input, contents) in line_starts_iter(input.as_str()).map(|i| input.take_split(i)) {
if let Ok((input, end)) = dyn_block_end_node(input) {
let (input, post_blank) = blank_lines(input)?;
let mut children = vec![begin];
children.extend(pre_blank);
children.push(contents.text_token());
children.push(end);
children.extend(post_blank);
return Ok((input, node(DYN_BLOCK, children)));
}
}
Err(nom::Err::Error(()))
}
fn dyn_block_begin_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, begin, ws_, name, (args, ws__, nl))) = tuple((
space0,
tag_no_case("#+BEGIN:"),
space1,
alpha1,
trim_line_end,
))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(begin);
b.ws(ws_);
b.text(name);
b.text(args);
b.ws(ws__);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_BEGIN)))
}
fn dyn_block_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (ws, end, ws_, nl)) =
tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?;
let mut b = NodeBuilder::new();
b.ws(ws);
b.text(end);
b.ws(ws_);
b.nl(nl);
Ok((input, b.finish(DYN_BLOCK_END)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn dyn_block_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(dyn_block_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::DynBlock, tests::to_ast};
let to_dyn_block = to_ast::<DynBlock>(dyn_block_node);
insta::assert_debug_snapshot!(
to_dyn_block(
r#"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"#).syntax,
@r###"
DYN_BLOCK@0..53
DYN_BLOCK_BEGIN@0..32
TEXT@0..8 "#+BEGIN:"
WHITESPACE@8..9 " "
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53 " "
"###
);
}

View file

@ -1,339 +0,0 @@
use std::iter::once;
use memchr::memchr2_iter;
use nom::{IResult, InputTake};
use super::{
block::block_node,
clock::clock_node,
combinator::GreenElement,
comment::comment_node,
drawer::drawer_node,
dyn_block::dyn_block_node,
fixed_width::fixed_width_node,
fn_def::fn_def_node,
input::Input,
keyword::{affiliated_keyword_nodes, keyword_node},
latex_environment::latex_environment_node,
list::list_node,
paragraph::{paragraph_node, paragraph_nodes},
rule::rule_node,
table::{org_table_node, table_el_node},
};
/// Recognizes multiple org-mode elements
///
/// input must not contains blank line in the beginning
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn element_nodes(input: Input) -> Result<Vec<GreenElement>, nom::Err<()>> {
debug_assert!(!input.is_empty());
// TODO:
// debug_assert!(
// blank_lines(input).unwrap().1.is_empty(),
// "input must not starts with blank lines: {:?}",
// input.s
// );
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in ElementPositions::new(i) {
if let Ok((input, element)) = element_node(input) {
if !head.is_empty() {
nodes.extend(paragraph_nodes(head)?);
}
nodes.push(element);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
continue 'l;
}
}
nodes.extend(paragraph_nodes(i)?);
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, n| s + &n.to_string()),
"parser must be lossless"
);
Ok(nodes)
}
/// Recognizes an org-mode element expect paragraph
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn element_node(input: Input) -> IResult<Input, GreenElement, ()> {
// skip affiliated keyword first
let (i, nodes) = affiliated_keyword_nodes(input)?;
let has_affiliated_keyword = !nodes.is_empty();
// find first non-whitespace character
let byte = i.bytes().find(|&b| b != b' ' && b != b'\t');
debug_assert!(
!(has_affiliated_keyword && matches!(byte, None | Some(b'\n') | Some(b'\r'))),
"affiliated_keyword must not followed by blank lines: {:?}",
input.s
);
let result = match byte {
Some(b'[') => fn_def_node(input),
Some(b'0'..=b'9') | Some(b'*') => list_node(input),
// clock doesn't have affiliated keywords
Some(b'C') if !has_affiliated_keyword => clock_node(input),
Some(b'-') => rule_node(input).or_else(|_| list_node(input)),
Some(b':') => drawer_node(input).or_else(|_| fixed_width_node(input)),
Some(b'|') => org_table_node(input),
Some(b'+') => table_el_node(input).or_else(|_| list_node(input)),
Some(b'#') => block_node(input)
.or_else(|_| keyword_node(input))
.or_else(|_| dyn_block_node(input))
.or_else(|_| comment_node(input)),
Some(b'\\') => latex_environment_node(input),
_ => Err(nom::Err::Error(())),
};
if has_affiliated_keyword {
result.or_else(|_| paragraph_node(input))
} else {
result
}
}
struct ElementPositions<'a> {
input: Input<'a>,
pos: usize,
}
impl<'a> ElementPositions<'a> {
fn new(input: Input<'a>) -> Self {
ElementPositions { input, pos: 0 }
}
}
impl<'a> Iterator for ElementPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.input.s.len() {
return None;
}
let bytes = &self.input.as_bytes()[self.pos..];
let mut iter = once(0).chain(memchr2_iter(b'\r', b'\n', bytes).map(|i| i + 1));
while let Some(i) = iter.next() {
let b = *bytes[i..].iter().find(|&&b| b != b' ' && b != b'\t')?;
if matches!(
b,
b'[' | b'0'..=b'9' | b'*' | b'C' | b'-' | b':' | b'|' | b'+' | b'#' | b'\\'
) {
let previous = self.pos;
self.pos = iter
.next()
.map_or_else(|| self.input.s.len(), |i| i + self.pos);
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
"{} < {} < {}",
previous,
self.pos,
self.input.s.len()
);
let (input, head) = self.input.take_split(i + previous);
return Some((input, head));
}
}
None
}
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
let s = "+\n\n C\n \r\n-\n\t\t[\n: \r\n";
let vec = ElementPositions::new((s, &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 5);
assert_eq!(vec[0].0.s, "+\n\n C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[1].0.s, " C\n \r\n-\n\t\t[\n: \r\n");
assert_eq!(vec[2].0.s, "-\n\t\t[\n: \r\n");
assert_eq!(vec[3].0.s, "\t\t[\n: \r\n");
assert_eq!(vec[4].0.s, ": \r\n");
}
#[test]
fn parse() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// paragraph stops at blank lines
insta::assert_debug_snapshot!(
t(r#"a
b"#),
@r###"
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###
);
// paragraph followed by special element
insta::assert_debug_snapshot!(
t("Table:\n|cell"),
@r###"
SECTION@0..12
PARAGRAPH@0..7
TEXT@0..7 "Table:\n"
ORG_TABLE@7..12
ORG_TABLE_STANDARD_ROW@7..12
PIPE@7..8 "|"
ORG_TABLE_CELL@8..12
TEXT@8..12 "cell"
"###
);
}
#[test]
fn affiliated_keywords() {
use crate::syntax::{SyntaxKind, SyntaxNode};
use crate::{syntax::combinator::node, ParseConfig};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = element_nodes((input, config).into()).unwrap();
SyntaxNode::new_root(node(SyntaxKind::SECTION, children).into_node().unwrap())
};
// affiliated keywords + paragraph
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n[[./img/a.jpg]]"),
@r###"
SECTION@0..41
PARAGRAPH@0..41
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
LINK@26..41
L_BRACKET2@26..28 "[["
LINK_PATH@28..39 "./img/a.jpg"
R_BRACKET2@39..41 "]]"
"###
);
// affiliated keywords + blank lines, fallback to normal keyword
insta::assert_debug_snapshot!(
t("#+ATTR_HTML: :width 300px\n#+CAPTION: abc\n\n[[./img/a.jpg]]"),
@r###"
SECTION@0..57
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..11 "ATTR_HTML"
COLON@11..12 ":"
TEXT@12..25 " :width 300px"
NEW_LINE@25..26 "\n"
KEYWORD@26..42
HASH_PLUS@26..28 "#+"
TEXT@28..35 "CAPTION"
COLON@35..36 ":"
TEXT@36..40 " abc"
NEW_LINE@40..41 "\n"
BLANK_LINE@41..42 "\n"
PARAGRAPH@42..57
LINK@42..57
L_BRACKET2@42..44 "[["
LINK_PATH@44..55 "./img/a.jpg"
R_BRACKET2@55..57 "]]"
"###
);
// affiliated keywords + special element
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\n[fn:WORD] https://orgmode.org"),
@r###"
SECTION@0..55
FN_DEF@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "WORD"
R_BRACKET@34..35 "]"
TEXT@35..55 " https://orgmode.org"
"###
);
// affiliated keywords + clock
insta::assert_debug_snapshot!(
t("#+CAPTION: a footnote def\nCLOCK: [2003-09-16 Tue 09:39]"),
@r###"
SECTION@0..55
PARAGRAPH@0..55
AFFILIATED_KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..25 " a footnote def"
NEW_LINE@25..26 "\n"
TEXT@26..33 "CLOCK: "
TIMESTAMP_INACTIVE@33..55
L_BRACKET@33..34 "["
TIMESTAMP_YEAR@34..38 "2003"
MINUS@38..39 "-"
TIMESTAMP_MONTH@39..41 "09"
MINUS@41..42 "-"
TIMESTAMP_DAY@42..44 "16"
WHITESPACE@44..45 " "
TIMESTAMP_DAYNAME@45..48 "Tue"
WHITESPACE@48..49 " "
TIMESTAMP_HOUR@49..51 "09"
COLON@51..52 ":"
TIMESTAMP_MINUTE@52..54 "39"
R_BRACKET@54..55 "]"
"###
);
// affiliated keywords + eof
insta::assert_debug_snapshot!(
t("#+CAPTION: Longer caption."),
@r###"
SECTION@0..26
KEYWORD@0..26
HASH_PLUS@0..2 "#+"
TEXT@2..9 "CAPTION"
COLON@9..10 ":"
TEXT@10..26 " Longer caption."
"###
);
}

View file

@ -1,186 +0,0 @@
use bytecount::count;
use memchr::memchr_iter;
use nom::{combinator::map, IResult, Slice};
use super::{
combinator::{node, token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn bold_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'*'), |contents| {
let mut children = vec![token(STAR, "*")];
children.extend(standard_object_nodes(contents));
children.push(token(STAR, "*"));
node(BOLD, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn code_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'~'), |contents| {
node(
CODE,
[token(TILDE, "~"), contents.text_token(), token(TILDE, "~")],
)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn strike_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'+'), |contents| {
let mut children = vec![token(PLUS, "+")];
children.extend(standard_object_nodes(contents));
children.push(token(PLUS, "+"));
node(STRIKE, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn verbatim_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'='), |contents| {
node(
VERBATIM,
[token(EQUAL, "="), contents.text_token(), token(EQUAL, "=")],
)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn underline_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'_'), |contents| {
let mut children = vec![token(UNDERSCORE, "_")];
children.extend(standard_object_nodes(contents));
children.push(token(UNDERSCORE, "_"));
node(UNDERLINE, children)
});
crate::lossless_parser!(parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn italic_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(emphasis(b'/'), |contents| {
let mut children = vec![token(SLASH, "/")];
children.extend(standard_object_nodes(contents));
children.push(token(SLASH, "/"));
node(ITALIC, children)
});
crate::lossless_parser!(parser, input)
}
fn emphasis(marker: u8) -> impl Fn(Input) -> IResult<Input, Input, ()> {
move |input: Input| {
let bytes = input.as_bytes();
if bytes.len() < 3 || bytes[0] != marker || bytes[1].is_ascii_whitespace() {
return Err(nom::Err::Error(()));
}
for idx in memchr_iter(marker, bytes).skip(1) {
// contains at least one character
if idx == 1 {
continue;
} else if count(&bytes[1..idx], b'\n') >= 2 {
break;
} else if validate_marker(idx, input) {
return Ok((input.slice(idx + 1..), input.slice(1..idx)));
}
}
Err(nom::Err::Error(()))
}
}
fn validate_marker(pos: usize, text: Input) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(post) = text.as_bytes().get(pos + 1) {
[
b' ', b'\t', b'\r', b'\n', b'-', b'.', b',', b';', b':', b'!', b'?', b'\'', b')', b'}',
b'[',
]
.contains(post)
} else {
true
}
}
pub fn verify_pre(input: &str) -> bool {
if input.is_empty() {
return true;
}
matches!(
input.as_bytes()[input.len() - 1],
b'\t' | b' ' | b'-' | b'(' | b'{' | b'\\' | b'"' | b'\r' | b'\n'
)
}
#[test]
fn parse() {
use crate::{ast::Bold, tests::to_ast, ParseConfig};
let to_bold = to_ast::<Bold>(bold_node);
insta::assert_debug_snapshot!(
to_bold("*bold*").syntax,
@r###"
BOLD@0..6
STAR@0..1 "*"
TEXT@1..5 "bold"
STAR@5..6 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo*ld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo*ld"
STAR@6..7 "*"
"###
);
insta::assert_debug_snapshot!(
to_bold("*bo\nld*").syntax,
@r###"
BOLD@0..7
STAR@0..1 "*"
TEXT@1..6 "bo\nld"
STAR@6..7 "*"
"###
);
let config = &ParseConfig::default();
assert!(bold_node(("*bold*a", config).into()).is_err());
assert!(bold_node(("*bold *", config).into()).is_err());
assert!(bold_node(("* bold*", config).into()).is_err());
assert!(bold_node(("*b\nol\nd*", config).into()).is_err());
assert!(italic_node(("*bold*", config).into()).is_err());
}

View file

@ -1,120 +0,0 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::alphanumeric1,
combinator::opt,
IResult,
};
use crate::{
entities::ENTITIES,
syntax::combinator::{backslash_token, node},
SyntaxKind,
};
use super::{combinator::GreenElement, input::Input};
pub fn entity_node(input: Input) -> IResult<Input, GreenElement, ()> {
debug_assert!(input.s.starts_with('\\'));
let mut parser = alt((template1, template2));
crate::lossless_parser!(parser, input)
}
// \NAME POST or // \NAME{}
fn template1(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, name) = alphanumeric1(input)?;
if ENTITIES.iter().all(|i| i.0 != name.s) {
return Err(nom::Err::Error(()));
}
let (input, brackets) = opt(tag("{}"))(input)?;
if let Some(brackets) = brackets {
return Ok((
input,
node(
SyntaxKind::ENTITY,
[backslash, name.text_token(), brackets.text_token()],
),
));
}
if let Some(post) = input.bytes().next() {
if post.is_ascii_alphabetic() {
return Err(nom::Err::Error(()));
}
}
Ok((
input,
node(SyntaxKind::ENTITY, [backslash, name.text_token()]),
))
}
// \_SPACES
fn template2(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, backslash) = backslash_token(input)?;
let (input, underscore) = tag("_")(input)?;
let (input, spaces) = take_while_m_n(1, 20, |c| c == ' ')(input)?;
Ok((
input,
node(
SyntaxKind::ENTITY,
[
backslash,
underscore.token(SyntaxKind::UNDERSCORE),
spaces.text_token(),
],
),
))
}
#[test]
fn parse() {
use crate::{ast::Entity, tests::to_ast, ParseConfig};
let to_entity = to_ast::<Entity>(entity_node);
insta::assert_debug_snapshot!(
to_entity("\\cent").syntax,
@r###"
ENTITY@0..5
BACKSLASH@0..1 "\\"
TEXT@1..5 "cent"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\S").syntax,
@r###"
ENTITY@0..2
BACKSLASH@0..1 "\\"
TEXT@1..2 "S"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\frac12{}test").syntax,
@r###"
ENTITY@0..9
BACKSLASH@0..1 "\\"
TEXT@1..7 "frac12"
TEXT@7..9 "{}"
"###
);
insta::assert_debug_snapshot!(
to_entity("\\_ ").syntax,
@r###"
ENTITY@0..21
BACKSLASH@0..1 "\\"
UNDERSCORE@1..2 "_"
TEXT@2..21 " "
"###
);
let c = ParseConfig::default();
assert!(entity_node(("\\poi", &c).into()).is_err());
}

View file

@ -1,100 +0,0 @@
use nom::{
bytes::complete::{tag, take_while},
character::complete::{space0, space1},
combinator::{iterator, opt},
sequence::tuple,
IResult,
};
use super::{
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
fn fixed_width_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let mut b = NodeBuilder::new();
let (input, keywords) = affiliated_keyword_nodes(input)?;
b.children.extend(keywords);
let mut iter = iterator(
input,
opt(tuple((
space0,
tag(":"),
opt(tuple((space1, take_while(|c| c != '\r' && c != '\n')))),
eol_or_eof,
))),
);
for (idx, option) in iter.enumerate() {
match option {
Some((ws, common, content, eol)) => {
b.ws(ws);
b.token(SyntaxKind::COMMA, common);
if let Some((ws, text)) = content {
b.ws(ws);
b.text(text);
}
b.text(eol);
}
_ if idx == 0 => return Err(nom::Err::Error(())),
_ => break,
}
}
let (input, _) = iter.finish()?;
let (input, post_blank) = blank_lines(input)?;
b.children.extend(post_blank);
Ok((input, b.finish(SyntaxKind::FIXED_WIDTH)))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fixed_width_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fixed_width_node_base, input)
}
#[test]
fn parse() {
use crate::{ast::FixedWidth, tests::to_ast};
let to_fixed_width = to_ast::<FixedWidth>(fixed_width_node);
insta::assert_debug_snapshot!(
to_fixed_width(
r#": A
:
: B
: C
"#
).syntax,
@r###"
FIXED_WIDTH@0..19
COMMA@0..1 ":"
WHITESPACE@1..2 " "
TEXT@2..3 "A"
TEXT@3..4 "\n"
COMMA@4..5 ":"
TEXT@5..6 "\n"
COMMA@6..7 ":"
WHITESPACE@7..8 " "
TEXT@8..9 "B"
TEXT@9..10 "\n"
COMMA@10..11 ":"
WHITESPACE@11..12 " "
TEXT@12..13 "C"
TEXT@13..14 "\n"
BLANK_LINE@14..15 "\n"
BLANK_LINE@15..19 " "
"###
);
}

View file

@ -1,157 +0,0 @@
use nom::{
bytes::complete::{tag, take_while1},
combinator::map,
sequence::tuple,
IResult,
};
use super::{
combinator::{
blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement,
NodeBuilder,
},
input::Input,
keyword::affiliated_keyword_nodes,
SyntaxKind,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fn_def_node(input: Input) -> IResult<Input, GreenElement, ()> {
let mut parser = map(
tuple((
affiliated_keyword_nodes,
l_bracket_token,
tag("fn"),
colon_token,
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
r_bracket_token,
trim_line_end,
blank_lines,
)),
|(
affiliated_keywords,
l_bracket,
fn_,
colon,
label,
r_bracket,
(content, ws_, nl),
post_blank,
)| {
let mut b = NodeBuilder::new();
b.children.extend(affiliated_keywords);
b.push(l_bracket);
b.text(fn_);
b.push(colon);
b.text(label);
b.push(r_bracket);
b.text(content);
b.ws(ws_);
b.nl(nl);
b.children.extend(post_blank);
b.finish(SyntaxKind::FN_DEF)
},
);
crate::lossless_parser!(parser, input)
}
#[test]
fn parse() {
use crate::ParseConfig;
use crate::{ast::FnDef, tests::to_ast};
let to_fn_def = to_ast::<FnDef>(fn_def_node);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..26
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..26 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:word_1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "word_1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..31
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..10 "WORD-1"
R_BRACKET@10..11 "]"
TEXT@11..31 " https://orgmode.org"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:WORD]").syntax,
@r###"
FN_DEF@0..9
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..8 "WORD"
R_BRACKET@8..9 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax,
@r###"
FN_DEF@0..66
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
TEXT@6..65 " In particular, the p ..."
NEW_LINE@65..66 "\n"
"###
);
let config = &ParseConfig::default();
assert!(fn_def_node(("[fn:] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:wor d] https://orgmode.org", config).into()).is_err());
assert!(fn_def_node(("[fn:WORD https://orgmode.org", config).into()).is_err());
insta::assert_debug_snapshot!(
to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax,
@r###"
FN_DEF@0..45
AFFILIATED_KEYWORD@0..14
HASH_PLUS@0..2 "#+"
TEXT@2..10 "ATTR_poi"
COLON@10..11 ":"
TEXT@11..13 " 1"
NEW_LINE@13..14 "\n"
L_BRACKET@14..15 "["
TEXT@15..17 "fn"
COLON@17..18 ":"
TEXT@18..24 "WORD-1"
R_BRACKET@24..25 "]"
TEXT@25..45 " https://orgmode.org"
"###
);
}

View file

@ -1,122 +0,0 @@
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::tuple,
Err, IResult, InputTake,
};
use super::{
combinator::{colon_token, l_bracket_token, node, r_bracket_token, GreenElement},
input::Input,
object::standard_object_nodes,
SyntaxKind::*,
};
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(input), fields(input = input.s))
)]
pub fn fn_ref_node(input: Input) -> IResult<Input, GreenElement, ()> {
crate::lossless_parser!(fn_ref_node_base, input)
}
fn fn_ref_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
let (input, (l_bracket, fn_, colon, label, definition, r_bracket)) = tuple((
l_bracket_token,
tag("fn"),
colon_token,
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
opt(tuple((colon_token, balanced_brackets))),
r_bracket_token,
))(input)?;
let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()];
if let Some((colon, definition)) = definition {
children.push(colon);
children.extend(standard_object_nodes(definition));
}
children.push(r_bracket);
Ok((input, node(FN_REF, children)))
}
fn balanced_brackets(input: Input) -> IResult<Input, Input, ()> {
let mut pairs = 1;
let bytes = input.as_bytes();
for i in memchr2_iter(b'[', b']', bytes) {
if bytes[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok(input.take_split(i));
}
}
Err(Err::Error(()))
}
#[test]
fn parse() {
use crate::{ast::FnRef, tests::to_ast, ParseConfig};
let to_fn_ref = to_ast::<FnRef>(fn_ref_node);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1]").syntax,
@r###"
FN_REF@0..6
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
R_BRACKET@5..6 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn:1:2]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..5 "1"
COLON@5..6 ":"
TEXT@6..7 "2"
R_BRACKET@7..8 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::2]").syntax,
@r###"
FN_REF@0..7
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..6 "2"
R_BRACKET@6..7 "]"
"###
);
insta::assert_debug_snapshot!(
to_fn_ref("[fn::[]]").syntax,
@r###"
FN_REF@0..8
L_BRACKET@0..1 "["
TEXT@1..3 "fn"
COLON@3..4 ":"
TEXT@4..4 ""
COLON@4..5 ":"
TEXT@5..7 "[]"
R_BRACKET@7..8 "]"
"###
);
let config = &ParseConfig::default();
assert!(fn_ref_node(("[fn::[]", config).into()).is_err());
}

Some files were not shown because too many files have changed in this diff Show more