docs: update README.md and doc-test

This commit is contained in:
PoiScript 2019-06-28 00:13:05 +08:00
parent 21aba13d71
commit 5a9e085b61
5 changed files with 298 additions and 226 deletions

View file

@ -81,12 +81,12 @@ fn parse() {
);
assert_eq!(
Keyword::parse("#+ATTR_LATEX: :width 5cm"),
Keyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some((
"ATTR_LATEX",
None,
":width 5cm",
"#+ATTR_LATEX: :width 5cm".len()
"#+ATTR_LATEX: :width 5cm\n".len()
))
);

View file

@ -1,3 +1,5 @@
//! Org-mode elements module
mod block;
mod clock;
mod cookie;
@ -47,6 +49,12 @@ pub use self::{
use indextree::NodeId;
/// Org-mode element enum
///
/// Generally, each variant contains a element struct and
/// a set of properties which indicate the position of the
/// element in the original string.
///
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))]

View file

@ -1,16 +1,15 @@
//! A Rust library for parsing orgmode files.
//!
//! # Using Parser
//! # Parse
//!
//! Orgize parser acts like a event-based parser, which means it
//! returns an `Iterator` of [`Event`] s.
//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//!
//! [`Event`]: enum.Event.html
//! [`Org::parse`]: org/struct.Org.html#method.parse
//!
//! ```rust
//! use orgize::Parser;
//! use orgize::Org;
//!
//! let parser = Parser::new(r#"* Title 1
//! let org = Org::parse(r#"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
@ -18,23 +17,140 @@
//! /Section 3/
//! * Title 4
//! =Section 4="#);
//! ```
//!
//! for event in parser {
//! # Iter
//!
//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is
//! a simple wrapper of [`Element`].
//!
//! [`Org::iter`]: org/struct.Org.html#method.iter
//! [`Event`]: iter/enum.Event.html
//! [`Element`]: elements/enum.Element.html
//!
//! ```rust
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! for event in org.iter() {
//! // handling the event
//! }
//! ```
//!
//! # Using Render
//! **Note**: whether an element is container or not, it will appears two times in a loop.
//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//!
//! You can use the built-in [`HtmlRender`] to generate html string directly:
//! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start
//! [`Event::End(element)`]: iter/enum.Event.html#variant.End
//!
//! [`HtmlRender`]: export/struct.HtmlRender.html
//! # Render html
//!
//! You can call the [`Org::html_default`] function to generate html directly, which
//! uses the [`DefaultHtmlHandler`] internally:
//!
//! [`Org::html_default`]: org/struct.Org.html#method.html_default
//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html
//!
//! ```rust
//! use orgize::export::HtmlRender;
//! use std::io::{Cursor, Result};
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! let mut writer = Vec::new();
//! org.html_default(&mut writer).unwrap();
//!
//! fn main() -> Result<()> {
//! assert_eq!(
//! String::from_utf8(writer).unwrap(),
//! "<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
//! );
//! ```
//!
//! # Render html with custom HtmlHandler
//!
//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing
//! it to the [`Org::html`] function.
//!
//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html
//! [`Org::html`]: org/struct.Org.html#method.html
//!
//! The following code demonstrates how to add a id for every headline and return
//! own error type while rendering.
//!
//! ```rust
//! # use std::convert::From;
//! # use std::io::{Error as IOError, Write};
//! # use std::string::FromUtf8Error;
//! #
//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler};
//! # use orgize::{Element, Org};
//! # use slugify::slugify;
//! #
//! #[derive(Debug)]
//! enum MyError {
//! IO(IOError),
//! Heading,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for MyError {
//! fn from(err: IOError) -> Self {
//! MyError::IO(err)
//! }
//! }
//!
//! impl From<FromUtf8Error> for MyError {
//! fn from(err: FromUtf8Error) -> Self {
//! MyError::Utf8(err)
//! }
//! }
//!
//! struct MyHtmlHandler;
//!
//! impl HtmlHandler<MyError> for MyHtmlHandler {
//! fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
//! let mut default_handler = DefaultHtmlHandler;
//! match element {
//! Element::Headline { headline, .. } => {
//! if headline.level > 6 {
//! return Err(MyError::Heading);
//! } else {
//! let slugify = slugify!(headline.title);
//! write!(
//! w,
//! "<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
//! headline.level,
//! slugify,
//! Escape(headline.title),
//! )?;
//! }
//! }
//! // fallthrough to default handler
//! _ => default_handler.start(w, element)?,
//! }
//! Ok(())
//! }
//! }
//!
//! fn main() -> Result<(), MyError> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
@ -44,99 +160,45 @@
//! * Title 4
//! =Section 4=";
//!
//! let mut cursor = Cursor::new(Vec::new());
//! let mut render = HtmlRender::default(&mut cursor, &contents);
//!
//! render.render()?;
//!
//! let mut writer = Vec::new();
//! Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
//! assert_eq!(
//! String::from_utf8(cursor.into_inner()).unwrap(),
//! "<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
//! String::from_utf8(writer)?,
//! "<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! # Custom HtmlHandler
//! **Note**: as I mentioned above, each element will appears two times while iterating.
//! And handler will silently ignores all end events from non-container elements.
//!
//! You can create your own handler by implementing [`HtmlHandler`] trait and passing
//! it to the [`HtmlRender`].
//! So if you want to change how a non-container element renders, just redefine the start
//! function and leave the end function untouched.
//!
//! The following example demonstrates how to add an anchor for every headline and use
//! your own error type.
//! # Serde
//!
//! [`HtmlHandler`]: export/trait.HtmlHandler.html
//! [`HtmlRender`]: export/struct.HtmlRender.html
//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
//! freely serialize it into any format that serde supports such as json:
//!
//! ```rust
//! use orgize::{export::*, headline::Headline};
//! use slugify::slugify;
//! use std::io::{Cursor, Error as IOError, Write};
//! use std::string::FromUtf8Error;
//! use serde_json::to_string;
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//!
//! // custom error type
//! #[derive(Debug)]
//! enum Error {
//! IO(IOError),
//! Headline,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for Error {
//! fn from(err: IOError) -> Error {
//! Error::IO(err)
//! }
//! }
//!
//! struct CustomHtmlHandler;
//!
//! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
//! if hdl.level > 6 {
//! Err(Error::Headline)
//! } else {
//! write!(
//! w,
//! r##"<h{}><a class="anchor" href="#{}">"##,
//! hdl.level,
//! slugify!(hdl.title),
//! )?;
//! self.escape(w, hdl.title)?;
//! Ok(write!(w, "</a></h{}>", hdl.level)?)
//! }
//! }
//! }
//!
//! fn main() -> Result<(), Error> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
//! * Title 3
//! /Section 3/
//! * Title 4
//! =Section 4=";
//!
//! let mut cursor = Cursor::new(Vec::new());
//! let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
//!
//! render.render()?;
//!
//! assert_eq!(
//! String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
//! "<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
//! );
//!
//! Ok(())
//! }
//! println!("{}", to_string(&org).unwrap());
//! ```
pub mod elements;

View file

@ -1,6 +1,6 @@
use indextree::{Arena, NodeId};
use jetscii::bytes;
use memchr::{memchr, memchr_iter, memrchr_iter};
use memchr::{memchr, memchr_iter};
use std::io::{Error, Write};
use crate::elements::*;
@ -80,13 +80,11 @@ impl<'a> Org<'a> {
if begin < end {
let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
if off != 0 {
let (contents_begin, contents_end) =
skip_empty_lines(&self.text[begin..begin + off]);
let section = Element::Section {
begin,
end: begin + off,
contents_begin: begin + contents_begin,
contents_end: begin + contents_end,
contents_begin: begin,
contents_end: begin + off,
};
let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap();
@ -236,7 +234,7 @@ impl<'a> Org<'a> {
if let Some((ty, off)) = self.parse_element(begin, end) {
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
pos += off;
pos += off + skip_empty_lines(&text[off..]);
}
let mut last_end = pos;
@ -247,12 +245,17 @@ impl<'a> Org<'a> {
.iter()
.all(u8::is_ascii_whitespace)
{
let (end, _) = skip_empty_lines(&text[pos + i..]);
let end = skip_empty_lines(&text[pos + i..]);
let new_node = self.arena.new_node(Element::Paragraph {
begin: begin + last_end,
end: begin + pos + i + end,
contents_begin: begin + last_end,
contents_end: begin + pos,
contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
});
node.append(new_node, &mut self.arena).unwrap();
pos += i + end;
@ -263,13 +266,18 @@ impl<'a> Org<'a> {
begin: begin + last_end,
end: begin + pos,
contents_begin: begin + last_end,
contents_end: begin + pos,
contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
});
node.append(new_node, &mut self.arena).unwrap();
}
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
pos += off;
pos += off + skip_empty_lines(&text[pos + off..]);
last_end = pos;
} else {
pos += i + 1;
@ -755,9 +763,8 @@ impl<'a> Org<'a> {
}
}
fn skip_empty_lines(text: &str) -> (usize, usize) {
fn skip_empty_lines(text: &str) -> usize {
let mut i = 0;
let mut j = text.len();
for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1;
@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) {
break;
}
}
for pos in memrchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
j = pos;
} else {
break;
}
}
(i, j)
i
}
#[test]
fn test_skip_empty_lines() {
assert_eq!(skip_empty_lines("foo"), 0);
assert_eq!(skip_empty_lines(" foo"), 0);
assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len());
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len());
assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), " \n \n\n".len());
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " \n \n\n".len());
}