docs: update README.md and doc-test

2019-06-28 00:13:05 +08:00 · 2019-06-28 00:13:05 +08:00 · 5a9e085b61
commit 5a9e085b61
parent 21aba13d71
5 changed files with 298 additions and 226 deletions
--- a/src/elements/keyword.rs
+++ b/src/elements/keyword.rs
@ -81,12 +81,12 @@ fn parse() {
    );

    assert_eq!(
-        Keyword::parse("#+ATTR_LATEX: :width 5cm"),
+        Keyword::parse("#+ATTR_LATEX: :width 5cm\n"),
        Some((
            "ATTR_LATEX",
            None,
            ":width 5cm",
-            "#+ATTR_LATEX: :width 5cm".len()
+            "#+ATTR_LATEX: :width 5cm\n".len()
        ))
    );

--- a/src/elements/mod.rs
+++ b/src/elements/mod.rs
@ -1,3 +1,5 @@
+//! Org-mode elements module
+
 mod block;
 mod clock;
 mod cookie;
@ -47,6 +49,12 @@ pub use self::{

 use indextree::NodeId;

+/// Org-mode element enum
+///
+/// Generally, each variant contains a element struct and
+/// a set of properties which indicate the position of the
+/// element in the original string.
+///
 #[derive(Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize))]
 #[cfg_attr(feature = "serde", serde(tag = "type"))]
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,16 +1,15 @@
 //! A Rust library for parsing orgmode files.
 //!
-//! # Using Parser
+//! # Parse
 //!
-//! Orgize parser acts like a event-based parser, which means it
-//! returns an `Iterator` of [`Event`] s.
+//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
 //!
-//! [`Event`]: enum.Event.html
+//! [`Org::parse`]: org/struct.Org.html#method.parse
 //!
 //! ```rust
-//! use orgize::Parser;
+//! use orgize::Org;
 //!
-//! let parser = Parser::new(r#"* Title 1
+//! let org = Org::parse(r#"* Title 1
 //! *Section 1*
 //! ** Title 2
 //! _Section 2_
@ -18,23 +17,140 @@
 //! /Section 3/
 //! * Title 4
 //! =Section 4="#);
+//! ```
 //!
-//! for event in parser {
+//! # Iter
+//!
+//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is
+//! a simple wrapper of [`Element`].
+//!
+//! [`Org::iter`]: org/struct.Org.html#method.iter
+//! [`Event`]: iter/enum.Event.html
+//! [`Element`]: elements/enum.Element.html
+//!
+//! ```rust
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
+//! #
+//! for event in org.iter() {
 //!     // handling the event
 //! }
 //! ```
 //!
-//! # Using Render
+//! **Note**: whether an element is container or not, it will appears two times in a loop.
+//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
 //!
-//! You can use the built-in [`HtmlRender`] to generate html string directly:
+//! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start
+//! [`Event::End(element)`]: iter/enum.Event.html#variant.End
 //!
-//! [`HtmlRender`]: export/struct.HtmlRender.html
+//! # Render html
+//!
+//! You can call the [`Org::html_default`] function to generate html directly, which
+//! uses the [`DefaultHtmlHandler`] internally:
+//!
+//! [`Org::html_default`]: org/struct.Org.html#method.html_default
+//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html
 //!
 //! ```rust
-//! use orgize::export::HtmlRender;
-//! use std::io::{Cursor, Result};
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
+//! #
+//! let mut writer = Vec::new();
+//! org.html_default(&mut writer).unwrap();
 //!
-//! fn main() -> Result<()> {
+//! assert_eq!(
+//!     String::from_utf8(writer).unwrap(),
+//!     "<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
+//!     <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
+//!     <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
+//!     <h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
+//! );
+//! ```
+//!
+//! # Render html with custom HtmlHandler
+//!
+//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing
+//! it to the [`Org::html`] function.
+//!
+//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html
+//! [`Org::html`]: org/struct.Org.html#method.html
+//!
+//! The following code demonstrates how to add a id for every headline and return
+//! own error type while rendering.
+//!
+//! ```rust
+//! # use std::convert::From;
+//! # use std::io::{Error as IOError, Write};
+//! # use std::string::FromUtf8Error;
+//! #
+//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler};
+//! # use orgize::{Element, Org};
+//! # use slugify::slugify;
+//! #
+//! #[derive(Debug)]
+//! enum MyError {
+//!     IO(IOError),
+//!     Heading,
+//!     Utf8(FromUtf8Error),
+//! }
+//!
+//! // From<std::io::Error> trait is required for custom error type
+//! impl From<IOError> for MyError {
+//!     fn from(err: IOError) -> Self {
+//!         MyError::IO(err)
+//!     }
+//! }
+//!
+//! impl From<FromUtf8Error> for MyError {
+//!     fn from(err: FromUtf8Error) -> Self {
+//!         MyError::Utf8(err)
+//!     }
+//! }
+//!
+//! struct MyHtmlHandler;
+//!
+//! impl HtmlHandler<MyError> for MyHtmlHandler {
+//!     fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
+//!         let mut default_handler = DefaultHtmlHandler;
+//!         match element {
+//!             Element::Headline { headline, .. } => {
+//!                 if headline.level > 6 {
+//!                     return Err(MyError::Heading);
+//!                 } else {
+//!                     let slugify = slugify!(headline.title);
+//!                     write!(
+//!                         w,
+//!                         "<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
+//!                         headline.level,
+//!                         slugify,
+//!                         Escape(headline.title),
+//!                     )?;
+//!                 }
+//!             }
+//!             // fallthrough to default handler
+//!             _ => default_handler.start(w, element)?,
+//!         }
+//!         Ok(())
+//!     }
+//! }
+//!
+//! fn main() -> Result<(), MyError> {
 //!     let contents = r"* Title 1
 //! *Section 1*
 //! ** Title 2
@ -44,99 +160,45 @@
 //! * Title 4
 //! =Section 4=";
 //!
-//!     let mut cursor = Cursor::new(Vec::new());
-//!     let mut render = HtmlRender::default(&mut cursor, &contents);
-//!
-//!     render.render()?;
-//!
+//!     let mut writer = Vec::new();
+//!     Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
 //!     assert_eq!(
-//!         String::from_utf8(cursor.into_inner()).unwrap(),
-//!         "<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
-//!          <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
-//!          <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
-//!          <h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
+//!         String::from_utf8(writer)?,
+//!         "<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
+//!          <h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
+//!          <h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
+//!          <h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
 //!     );
 //!
 //!     Ok(())
 //! }
 //! ```
 //!
-//! # Custom HtmlHandler
+//! **Note**: as I mentioned above, each element will appears two times while iterating.
+//! And handler will silently ignores all end events from non-container elements.
 //!
-//! You can create your own handler by implementing [`HtmlHandler`] trait and passing
-//! it to the [`HtmlRender`].
+//! So if you want to change how a non-container element renders, just redefine the start
+//! function and leave the end function untouched.
 //!
-//! The following example demonstrates how to add an anchor for every headline and use
-//! your own error type.
+//! # Serde
 //!
-//! [`HtmlHandler`]: export/trait.HtmlHandler.html
-//! [`HtmlRender`]: export/struct.HtmlRender.html
+//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
+//! freely serialize it into any format that serde supports such as json:
 //!
 //! ```rust
-//! use orgize::{export::*, headline::Headline};
-//! use slugify::slugify;
-//! use std::io::{Cursor, Error as IOError, Write};
-//! use std::string::FromUtf8Error;
+//! use serde_json::to_string;
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
 //!
-//! // custom error type
-//! #[derive(Debug)]
-//! enum Error {
-//!     IO(IOError),
-//!     Headline,
-//!     Utf8(FromUtf8Error),
-//! }
-//!
-//! // From<std::io::Error> trait is required for custom error type
-//! impl From<IOError> for Error {
-//!     fn from(err: IOError) -> Error {
-//!         Error::IO(err)
-//!     }
-//! }
-//!
-//! struct CustomHtmlHandler;
-//!
-//! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
-//!     fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
-//!          if hdl.level > 6 {
-//!              Err(Error::Headline)
-//!          } else {
-//!              write!(
-//!                  w,
-//!                  r##"<h{}><a class="anchor" href="#{}">"##,
-//!                  hdl.level,
-//!                  slugify!(hdl.title),
-//!              )?;
-//!              self.escape(w, hdl.title)?;
-//!              Ok(write!(w, "</a></h{}>", hdl.level)?)
-//!          }
-//!     }
-//! }
-//!
-//! fn main() -> Result<(), Error> {
-//!     let contents = r"* Title 1
-//! *Section 1*
-//! ** Title 2
-//! _Section 2_
-//! * Title 3
-//! /Section 3/
-//! * Title 4
-//! =Section 4=";
-//!
-//!     let mut cursor = Cursor::new(Vec::new());
-//!     let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
-//!
-//!     render.render()?;
-//!
-//!     assert_eq!(
-//!         String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
-//!         "<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
-//!          <h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
-//!          <h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
-//!          <h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
-//!     );
-//!
-//!     Ok(())
-//! }
+//! println!("{}", to_string(&org).unwrap());
 //! ```

 pub mod elements;
--- a/src/org.rs
+++ b/src/org.rs
@ -1,6 +1,6 @@
 use indextree::{Arena, NodeId};
 use jetscii::bytes;
-use memchr::{memchr, memchr_iter, memrchr_iter};
+use memchr::{memchr, memchr_iter};
 use std::io::{Error, Write};

 use crate::elements::*;
@ -80,13 +80,11 @@ impl<'a> Org<'a> {
                    if begin < end {
                        let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
                        if off != 0 {
-                            let (contents_begin, contents_end) =
-                                skip_empty_lines(&self.text[begin..begin + off]);
                            let section = Element::Section {
                                begin,
                                end: begin + off,
-                                contents_begin: begin + contents_begin,
-                                contents_end: begin + contents_end,
+                                contents_begin: begin,
+                                contents_end: begin + off,
                            };
                            let new_node = self.arena.new_node(section);
                            node.append(new_node, &mut self.arena).unwrap();
@ -236,7 +234,7 @@ impl<'a> Org<'a> {
        if let Some((ty, off)) = self.parse_element(begin, end) {
            let new_node = self.arena.new_node(ty);
            node.append(new_node, &mut self.arena).unwrap();
-            pos += off;
+            pos += off + skip_empty_lines(&text[off..]);
        }

        let mut last_end = pos;
@ -247,12 +245,17 @@ impl<'a> Org<'a> {
                .iter()
                .all(u8::is_ascii_whitespace)
            {
-                let (end, _) = skip_empty_lines(&text[pos + i..]);
+                let end = skip_empty_lines(&text[pos + i..]);
                let new_node = self.arena.new_node(Element::Paragraph {
                    begin: begin + last_end,
                    end: begin + pos + i + end,
                    contents_begin: begin + last_end,
-                    contents_end: begin + pos,
+                    contents_end: begin
+                        + if text.as_bytes()[pos - 1] == b'\n' {
+                            pos - 1
+                        } else {
+                            pos
+                        },
                });
                node.append(new_node, &mut self.arena).unwrap();
                pos += i + end;
@ -263,13 +266,18 @@ impl<'a> Org<'a> {
                        begin: begin + last_end,
                        end: begin + pos,
                        contents_begin: begin + last_end,
-                        contents_end: begin + pos,
+                        contents_end: begin
+                            + if text.as_bytes()[pos - 1] == b'\n' {
+                                pos - 1
+                            } else {
+                                pos
+                            },
                    });
                    node.append(new_node, &mut self.arena).unwrap();
                }
                let new_node = self.arena.new_node(ty);
                node.append(new_node, &mut self.arena).unwrap();
-                pos += off;
+                pos += off + skip_empty_lines(&text[pos + off..]);
                last_end = pos;
            } else {
                pos += i + 1;
@ -755,9 +763,8 @@ impl<'a> Org<'a> {
    }
 }

-fn skip_empty_lines(text: &str) -> (usize, usize) {
+fn skip_empty_lines(text: &str) -> usize {
    let mut i = 0;
-    let mut j = text.len();
    for pos in memchr_iter(b'\n', text.as_bytes()) {
        if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
            i = pos + 1;
@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) {
            break;
        }
    }
-
-    for pos in memrchr_iter(b'\n', text.as_bytes()) {
-        if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
-            j = pos;
-        } else {
-            break;
-        }
-    }
-
-    (i, j)
+    i
+}
+
+#[test]
+fn test_skip_empty_lines() {
+    assert_eq!(skip_empty_lines("foo"), 0);
+    assert_eq!(skip_empty_lines(" foo"), 0);
+    assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len());
+    assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len());
+    assert_eq!(skip_empty_lines(" \n  \n\nfoo\n"), " \n  \n\n".len());
+    assert_eq!(skip_empty_lines(" \n  \n\n   foo\n"), " \n  \n\n".len());
 }