From b446471535c9ed8863275b0c4e77acbd44c04097 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Wed, 30 Oct 2019 14:43:55 +0800 Subject: [PATCH] feat(parser): table blank lines --- src/elements/block.rs | 8 ++- src/elements/clock.rs | 1 + src/elements/comment.rs | 5 +- src/elements/drawer.rs | 2 + src/elements/dyn_block.rs | 3 +- src/elements/fixed_width.rs | 5 +- src/elements/keyword.rs | 2 + src/elements/rule.rs | 5 +- src/elements/table.rs | 119 ++++++++++++++++++++---------------- src/parsers.rs | 115 ++++++++++++++++++---------------- 10 files changed, 151 insertions(+), 114 deletions(-) diff --git a/src/elements/block.rs b/src/elements/block.rs index d664ced..fe2b5b3 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,8 +1,11 @@ use std::borrow::Cow; use nom::{ - bytes::complete::tag_no_case, character::complete::alpha1, error::ParseError, - sequence::preceded, IResult, + bytes::complete::tag_no_case, + character::complete::{alpha1, space0}, + error::ParseError, + sequence::preceded, + IResult, }; use crate::parsers::{blank_lines, line, take_lines_while}; @@ -226,6 +229,7 @@ pub fn parse_block_element(input: &str) -> Option<(&str, (&str, Option<&str>, &s fn parse_block_element_internal<'a, E: ParseError<&'a str>>( input: &'a str, ) -> IResult<&str, (&str, Option<&str>, &str, usize), E> { + let (input, _) = space0(input)?; let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; let (input, args) = line(input)?; let end_line = format!("#+END_{}", name); diff --git a/src/elements/clock.rs b/src/elements/clock.rs index 407d245..e024a74 100644 --- a/src/elements/clock.rs +++ b/src/elements/clock.rs @@ -139,6 +139,7 @@ impl Clock<'_> { } fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Clock, E> { + let (input, _) = space0(input)?; let (input, _) = tag("CLOCK:")(input)?; let (input, _) = space0(input)?; let (input, timestamp) = parse_inactive(input)?; diff --git a/src/elements/comment.rs b/src/elements/comment.rs index 2f029ac..fda1a80 100644 --- a/src/elements/comment.rs +++ b/src/elements/comment.rs @@ -14,7 +14,10 @@ pub struct Comment<'a> { impl Comment<'_> { pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> { - let (input, value) = take_lines_while(|line| line == "#" || line.starts_with("# "))(input); + let (input, value) = take_lines_while(|line| { + let line = line.trim_start(); + line == "#" || line.starts_with("# ") + })(input); let (input, blank) = blank_lines(input); if value.is_empty() { diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index bbe57f2..9b40a7b 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use nom::{ bytes::complete::{tag, take_while1}, + character::complete::space0, error::ParseError, sequence::delimited, IResult, @@ -56,6 +57,7 @@ pub fn parse_drawer<'a, E: ParseError<&'a str>>( pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>( input: &'a str, ) -> IResult<&str, (Drawer, &str), E> { + let (input, _) = space0(input)?; let (input, name) = delimited( tag(":"), take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index e919450..ab96cd0 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use nom::{ bytes::complete::tag_no_case, - character::complete::{alpha1, space1}, + character::complete::{alpha1, space0, space1}, error::ParseError, IResult, }; @@ -46,6 +46,7 @@ impl DynBlock<'_> { fn parse_dyn_block<'a, E: ParseError<&'a str>>( input: &'a str, ) -> IResult<&str, (DynBlock, &str), E> { + let (input, _) = space0(input)?; let (input, _) = tag_no_case("#+BEGIN:")(input)?; let (input, _) = space1(input)?; let (input, name) = alpha1(input)?; diff --git a/src/elements/fixed_width.rs b/src/elements/fixed_width.rs index 7e29ec3..f47b437 100644 --- a/src/elements/fixed_width.rs +++ b/src/elements/fixed_width.rs @@ -15,7 +15,10 @@ pub struct FixedWidth<'a> { impl FixedWidth<'_> { pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> { - let (input, value) = take_lines_while(|line| line == ":" || line.starts_with(": "))(input); + let (input, value) = take_lines_while(|line| { + let line = line.trim_start(); + line == ":" || line.starts_with(": ") + })(input); let (input, blank) = blank_lines(input); if value.is_empty() { diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 7c7dcdc..b3adc3a 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use nom::{ bytes::complete::{tag, take_till}, + character::complete::space0, combinator::opt, error::ParseError, sequence::delimited, @@ -67,6 +68,7 @@ pub fn parse_keyword(input: &str) -> Option<(&str, (&str, Option<&str>, &str, us fn parse_keyword_internal<'a, E: ParseError<&'a str>>( input: &'a str, ) -> IResult<&str, (&str, Option<&str>, &str, usize), E> { + let (input, _) = space0(input)?; let (input, _) = tag("#+")(input)?; let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; let (input, optional) = opt(delimited( diff --git a/src/elements/rule.rs b/src/elements/rule.rs index 1b85762..530545d 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,4 +1,6 @@ -use nom::{bytes::complete::take_while_m_n, error::ParseError, IResult}; +use nom::{ + bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult, +}; use crate::parsers::{blank_lines, eol}; @@ -18,6 +20,7 @@ impl Rule { } fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> { + let (input, _) = space0(input)?; let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?; let (input, _) = eol(input)?; let (input, blank) = blank_lines(input); diff --git a/src/elements/table.rs b/src/elements/table.rs index d41d6a5..5fe8878 100644 --- a/src/elements/table.rs +++ b/src/elements/table.rs @@ -1,12 +1,8 @@ use std::borrow::Cow; -use nom::{ - combinator::{peek, verify}, - error::ParseError, - IResult, -}; +use memchr::memchr; -use crate::parsers::{line, take_lines_while}; +use crate::parsers::{blank_lines, take_lines_while}; /// Table Elemenet #[derive(Debug)] @@ -16,20 +12,64 @@ use crate::parsers::{line, take_lines_while}; pub enum Table<'a> { /// "org" type table #[cfg_attr(feature = "ser", serde(rename = "org"))] - Org { tblfm: Option> }, + Org { + #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] + tblfm: Option>, + /// Numbers of blank lines between last table's line and next non-blank + /// line or buffer's end + post_blank: usize, + }, /// "table.el" type table #[cfg_attr(feature = "ser", serde(rename = "table.el"))] - TableEl { value: Cow<'a, str> }, + TableEl { + value: Cow<'a, str>, + /// Numbers of blank lines between last table's line and next non-blank + /// line or buffer's end + post_blank: usize, + }, } impl Table<'_> { + pub fn parse_table_el(input: &str) -> Option<(&str, Table<'_>)> { + let first_line = memchr(b'\n', input.as_bytes()) + .map(|i| input[0..i].trim()) + .unwrap_or_else(|| input.trim()); + + // first line must be the "+-" string and followed by plus or minus signs + if !first_line.starts_with("+-") + || first_line + .as_bytes() + .iter() + .any(|&c| c != b'+' && c != b'-') + { + return None; + } + + let (input, content) = take_lines_while(|line| { + let line = line.trim_start(); + line.starts_with('|') || line.starts_with('+') + })(input); + + let (input, blank) = blank_lines(input); + + Some(( + input, + Table::TableEl { + value: content.into(), + post_blank: blank, + }, + )) + } + pub fn into_owned(self) -> Table<'static> { match self { - Table::Org { tblfm } => Table::Org { + Table::Org { tblfm, post_blank } => Table::Org { tblfm: tblfm.map(Into::into).map(Cow::Owned), + post_blank: post_blank, }, - Table::TableEl { value } => Table::TableEl { + Table::TableEl { value, post_blank } => Table::TableEl { value: value.into_owned().into(), + post_blank: post_blank, }, } } @@ -46,57 +86,28 @@ pub enum TableRow { Rule, } -impl TableRow { - pub(crate) fn parse(input: &str) -> Option { - if input.starts_with("|-") { - Some(TableRow::Rule) - } else if input.starts_with('|') { - Some(TableRow::Standard) - } else { - None - } - } -} - -pub fn parse_table_el(input: &str) -> Option<(&str, &str)> { - parse_table_el_internal::<()>(input).ok() -} - -fn parse_table_el_internal<'a, E: ParseError<&'a str>>( - input: &'a str, -) -> IResult<&'a str, &'a str, E> { - let (input, _) = peek(verify(line, |s: &str| { - let s = s.trim(); - s.starts_with("+-") && s.as_bytes().iter().all(|&c| c == b'+' || c == b'-') - }))(input)?; - - let (input, content) = - take_lines_while(|line| line.starts_with('|') || line.starts_with('+'))(input); - - Ok((input, content)) -} - #[test] fn parse_table_el_() { - use nom::error::VerboseError; - assert_eq!( - parse_table_el_internal::>( - r#"+---+ -| | -+---+ + Table::parse_table_el( + r#" +---+ + | | + +---+ "# ), - Ok(( - r#" -"#, - r#"+---+ -| | -+---+ + Some(( + "", + Table::TableEl { + value: r#" +---+ + | | + +---+ "# + .into(), + post_blank: 1 + } )) ); - assert!(parse_table_el_internal::>("").is_err()); - assert!(parse_table_el_internal::>("+----|---").is_err()); + assert!(Table::parse_table_el("").is_none()); + assert!(Table::parse_table_el("+----|---").is_none()); } diff --git a/src/parsers.rs b/src/parsers.rs index 38c4c87..462aed3 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -12,11 +12,10 @@ use nom::{bytes::complete::take_while1, combinator::verify, error::ParseError, I use crate::config::ParseConfig; use crate::elements::{ block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword, - radio_target::parse_radio_target, table::parse_table_el, BabelCall, CenterBlock, Clock, - Comment, CommentBlock, Cookie, Drawer, DynBlock, Element, ExampleBlock, ExportBlock, - FixedWidth, FnDef, FnRef, InlineCall, InlineSrc, Keyword, Link, List, ListItem, Macros, - QuoteBlock, Rule, Snippet, SourceBlock, SpecialBlock, Table, TableRow, Target, Timestamp, - Title, VerseBlock, + radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie, + Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall, + InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock, + SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock, }; pub trait ElementArena<'a> { @@ -268,20 +267,21 @@ pub fn parse_block<'a, T: ElementArena<'a>>( parent: NodeId, containers: &mut Vec>, ) -> Option<&'a str> { - // footnote definitions must be start at column 0 - if let Some((tail, (fn_def, content))) = FnDef::parse(contents) { - let node = arena.append(fn_def, parent); - containers.push(Container::Block { content, node }); - return Some(tail); - } - - if let Some(tail) = parse_list(arena, contents, parent, containers) { - return Some(tail); - } - - let contents = contents.trim_start(); - - match contents.as_bytes().get(0)? { + match contents + .as_bytes() + .iter() + .find(|c| !c.is_ascii_whitespace())? + { + b'[' => { + let (tail, (fn_def, content)) = FnDef::parse(contents)?; + let node = arena.append(fn_def, parent); + containers.push(Container::Block { content, node }); + Some(tail) + } + b'0'..=b'9' | b'*' => { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } b'C' => { let (tail, clock) = Clock::parse(contents)?; arena.append(clock, parent); @@ -292,9 +292,13 @@ pub fn parse_block<'a, T: ElementArena<'a>>( None } b'-' => { - let (tail, rule) = Rule::parse(contents)?; - arena.append(rule, parent); - Some(tail) + if let Some((tail, rule)) = Rule::parse(contents) { + arena.append(rule, parent); + Some(tail) + } else { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } } b':' => { if let Some((tail, (drawer, content))) = Drawer::parse(contents) { @@ -308,9 +312,18 @@ pub fn parse_block<'a, T: ElementArena<'a>>( } } b'|' => { - let tail = parse_table(arena, contents, containers, parent)?; + let tail = parse_org_table(arena, contents, containers, parent); Some(tail) } + b'+' => { + if let Some((tail, table)) = Table::parse_table_el(contents) { + arena.append(table, parent); + Some(tail) + } else { + let tail = parse_list(arena, contents, parent, containers)?; + Some(tail) + } + } b'#' => { if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) { match_block( @@ -687,45 +700,39 @@ pub fn parse_list<'a, T: ElementArena<'a>>( Some(tail) } -pub fn parse_table<'a, T: ElementArena<'a>>( +pub fn parse_org_table<'a, T: ElementArena<'a>>( arena: &mut T, contents: &'a str, containers: &mut Vec>, parent: NodeId, -) -> Option<&'a str> { - if contents.trim_start().starts_with('|') { - let table_node = arena.append(Table::Org { tblfm: None }, parent); +) -> &'a str { + let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents); + let (tail, blank) = blank_lines(tail); - let mut last_end = 0; - for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) { - let line = contents[last_end..start].trim(); - match TableRow::parse(line) { - Some(TableRow::Standard) => { - let row_node = arena.append(TableRow::Standard, table_node); - for cell in line[1..].split_terminator('|') { - let cell_node = arena.append(Element::TableCell, row_node); - containers.push(Container::Inline { - content: cell.trim(), - node: cell_node, - }); - } - } - Some(TableRow::Rule) => { - arena.append(TableRow::Rule, table_node); - } - None => return Some(&contents[last_end..]), + let parent = arena.append( + Table::Org { + tblfm: None, + post_blank: blank, + }, + parent, + ); + + let mut last_end = 0; + for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) { + let line = contents[last_end..start].trim_start(); + if line.starts_with("|-") { + arena.append(TableRow::Rule, parent); + } else { + let parent = arena.append(TableRow::Standard, parent); + for content in line.split_terminator('|').skip(1) { + let node = arena.append(Element::TableCell, parent); + containers.push(Container::Inline { content, node }); } - last_end = start + 1; } - - Some("") - } else { - let (tail, value) = parse_table_el(contents)?; - let value = value.into(); - arena.append(Table::TableEl { value }, parent); - - Some(tail) + last_end = start + 1; } + + tail } pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {