From 763ec00434abd6e7c2a6cab7f783797a454e17e7 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Wed, 23 Jan 2019 00:11:09 +0800 Subject: [PATCH] refactor: objects parse --- src/elements/mod.rs | 67 +++++++++-------- src/lib.rs | 1 + src/objects/mod.rs | 177 ++++++++++++++++++++++++-------------------- src/parser.rs | 32 -------- 4 files changed, 133 insertions(+), 144 deletions(-) diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 208bf58..64404a5 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -97,7 +97,7 @@ impl<'a> Element<'a> { // Unlike other element, footnote definition must starts at column 0 if bytes[pos] == b'[' { if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { - return if pos == start { + break if pos == start { (off + 1, Some(Element::FnDef { label, cont }), None) } else { ( @@ -116,9 +116,9 @@ impl<'a> Element<'a> { pos = skip_space!(src, pos); if pos <= src.len() { - macro_rules! ret { + macro_rules! brk { ($ele:expr, $off:expr) => { - return if pos == start { + break if pos == start { ($off, Some($ele), None) } else { ( @@ -145,7 +145,7 @@ impl<'a> Element<'a> { cont_end, end: list_end, }; - return if pos == start { + break if pos == start { (1, Some(list), None) } else { ( @@ -161,7 +161,7 @@ impl<'a> Element<'a> { } if bytes[pos] == b'\n' { - return ( + break ( start, Some(Element::Paragraph { cont_end: end, @@ -178,73 +178,76 @@ impl<'a> Element<'a> { if bytes[pos] == b'-' { let off = Rule::parse(&src[pos..]); if off != 0 { - ret!(Element::Rule, off); + brk!(Element::Rule, off); } } // TODO: multiple lines fixed width area - if bytes[pos] == b':' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) { + if bytes[pos] == b':' + && bytes + .get(pos + 1) + .map(|&b| b == b' ' || b == b'\n') + .unwrap_or(false) + { let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) .map(|i| i + 1) .unwrap_or_else(|| src.len() - pos); - ret!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol); + brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol); } if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) { - if let Some((name, args, contents_beg, cont_end, end)) = - Block::parse(&src[pos..]) - { - let cont = &src[pos + contents_beg + 1..pos + cont_end - 1]; + if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { + let cont = &src[pos + cont_beg + 1..pos + cont_end - 1]; match name.to_uppercase().as_str() { - "COMMENT" => ret!(Element::CommentBlock { args, cont }, pos + end), - "EXAMPLE" => ret!(Element::ExampleBlock { args, cont }, pos + end), - "EXPORT" => ret!(Element::ExportBlock { args, cont }, pos + end), - "SRC" => ret!(Element::SrcBlock { args, cont }, pos + end), - "VERSE" => ret!(Element::VerseBlock { args, cont }, pos + end), - "CENTER" => ret!( + "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), + "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), + "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), + "SRC" => brk!(Element::SrcBlock { args, cont }, end), + "VERSE" => brk!(Element::VerseBlock { args, cont }, end), + "CENTER" => brk!( Element::CtrBlock { args, cont_end, end, }, - pos + contents_beg + cont_beg ), - "QUOTE" => ret!( + "QUOTE" => brk!( Element::QteBlock { args, cont_end, end, }, - pos + contents_beg + cont_beg ), - _ => ret!( + _ => brk!( Element::SplBlock { name, args, cont_end, - end, + end }, - pos + contents_beg + cont_beg ), }; } - if let Some((name, args, contents_beg, cont_end, end)) = + if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) { - ret!( + brk!( Element::DynBlock { name, args, cont_end, end, }, - pos + contents_beg + cont_beg ) } if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { - ret!( + brk!( if let Key::Call = key { Element::Call { value } } else { @@ -261,15 +264,15 @@ impl<'a> Element<'a> { let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) .map(|i| i + 1) .unwrap_or_else(|| src.len() - pos); - ret!(Element::Comment(&src[pos + 1..pos + eol]), eol); + brk!(Element::Comment(&src[pos + 1..pos + eol]), eol); } } - if let Some(off) = &src[pos..].find('\n') { + if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) { pos += off + 1; // last char if pos == src.len() { - return ( + break ( start, Some(Element::Paragraph { cont_end: pos - 1, @@ -279,7 +282,7 @@ impl<'a> Element<'a> { ); } } else { - return ( + break ( start, Some(Element::Paragraph { cont_end: src.len(), diff --git a/src/lib.rs b/src/lib.rs index 2d07887..d168696 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#[macro_use] extern crate jetscii; extern crate memchr; diff --git a/src/objects/mod.rs b/src/objects/mod.rs index dea4901..adb7fa6 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -47,111 +47,128 @@ impl<'a> Object<'a> { pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { let bytes = src.as_bytes(); - if src.len() < 2 { + if src.len() <= 2 { return (Object::Text(src), src.len(), None); } - // TODO: refactor with src[..].find(..) - for pos in 0..src.len() - 2 { - macro_rules! ret { - ($obj:expr, $off:expr) => { - return if pos == 0 { + let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '['); + + let mut pos = 0; + loop { + macro_rules! brk { + ($obj:expr, $off:expr, $pos:expr) => { + break if pos == 0 { ($obj, $off, None) } else { - (Object::Text(&src[0..pos]), pos, Some(($obj, $off))) + (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) }; }; } - let first = bytes[pos]; - let second = bytes[pos + 1]; - let third = bytes[pos + 2]; + let mut pre = pos; - if first == b'@' && second == b'@' { - if let Some((snippet, off)) = Snippet::parse(&src[pos..]) { - ret!(Object::Snippet(snippet), off); - } - } - - if first == b'[' { - if second == b'f' && third == b'n' { - if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) { - ret!(Object::FnRef(fn_ref), off); + match (bytes[pos], bytes[pos + 1], bytes[pos + 2]) { + (b'@', b'@', _) => { + if let Some((snippet, off)) = Snippet::parse(&src[pos..]) { + brk!(Object::Snippet(snippet), off, pos); } - } else if second == b'[' { - if let Some((link, off)) = Link::parse(&src[pos..]) { - ret!(Object::Link(link), off); - } - } else { - if let Some((cookie, off)) = Cookie::parse(&src[pos..]) { - ret!(Object::Cookie(cookie), off); - } - // TODO: Timestamp } - } - - if first == b'{' && second == b'{' && third == b'{' { - if let Some((macros, off)) = Macros::parse(&src[pos..]) { - ret!(Object::Macros(macros), off); + (b'{', b'{', b'{') => { + if let Some((macros, off)) = Macros::parse(&src[pos..]) { + brk!(Object::Macros(macros), off, pos); + } } - } - - if first == b'<' && second == b'<' { - if third == b'<' { + (b'<', b'<', b'<') => { if let Some((target, off)) = RadioTarget::parse(&src[pos..]) { - ret!(Object::RadioTarget(target), off); - } - } else if third != b'<' && third != b'\n' { - if let Some((target, off)) = Target::parse(&src[pos..]) { - ret!(Object::Target(target), off); + brk!(Object::RadioTarget(target), off, pos); } } - } - - if pos == 0 - || bytes[pos - 1] == b' ' - || bytes[pos - 1] == b'"' - || bytes[pos - 1] == b'(' - || bytes[pos - 1] == b',' - || bytes[pos - 1] == b'\n' - || bytes[pos - 1] == b'{' - { - if (first == b'*' - || first == b'+' - || first == b'/' - || first == b'=' - || first == b'_' - || first == b'~') - && !second.is_ascii_whitespace() - { - if let Some(end) = Emphasis::parse(&src[pos..], first) { - match first { - b'*' => ret!(Object::Bold { end }, 1), - b'+' => ret!(Object::Strike { end }, 1), - b'/' => ret!(Object::Italic { end }, 1), - b'_' => ret!(Object::Underline { end }, 1), - b'~' => ret!(Object::Code(&src[pos + 1..pos + end]), end + 1), - b'=' => ret!(Object::Verbatim(&src[pos + 1..pos + end]), end + 1), - _ => unreachable!(), + (b'<', b'<', third) => { + if third != b'\n' { + if let Some((target, off)) = Target::parse(&src[pos..]) { + brk!(Object::Target(target), off, pos); } } } - - if first == b'c' && second == b'a' && third == b'l' { - if let Some((call, off)) = InlineCall::parse(&src[pos..]) { - ret!(Object::InlineCall(call), off); + (b'[', b'f', b'n') => { + if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) { + brk!(Object::FnRef(fn_ref), off, pos); } } - - if first == b's' && second == b'r' && third == b'c' { - if let Some((src, off)) = InlineSrc::parse(&src[pos..]) { - ret!(Object::InlineSrc(src), off); + (b'[', b'[', _) => { + if let Some((link, off)) = Link::parse(&src[pos..]) { + brk!(Object::Link(link), off, pos); } } + (b'[', _, _) => { + if let Some((cookie, off)) = Cookie::parse(&src[pos..]) { + brk!(Object::Cookie(cookie), off, pos); + } + // TODO: Timestamp + } + (b'{', _, _) + | (b' ', _, _) + | (b'"', _, _) + | (b',', _, _) + | (b'(', _, _) + | (b'\n', _, _) => pre += 1, + _ => (), + } + + match bytes[pre] { + b'*' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'*') { + brk!(Object::Bold { end }, 1, pre); + } + } + b'+' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'+') { + brk!(Object::Strike { end }, 1, pre); + } + } + b'/' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'/') { + brk!(Object::Italic { end }, 1, pre); + } + } + b'_' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'_') { + brk!(Object::Underline { end }, 1, pre); + } + } + b'=' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'=') { + brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre); + } + } + b'~' => { + if let Some(end) = Emphasis::parse(&src[pre..], b'~') { + brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre); + } + } + b'c' => { + if let Some((call, off)) = InlineCall::parse(&src[pre..]) { + brk!(Object::InlineCall(call), off, pre); + } + } + b's' => { + if let Some((src, off)) = InlineSrc::parse(&src[pre..]) { + brk!(Object::InlineSrc(src), off, pre); + } + } + _ => (), + } + + if let Some(off) = chars + .find(&src[pos + 1..]) + .map(|i| i + pos + 1) + .filter(|&i| i < src.len() - 2) + { + pos = off; + } else { + break (Object::Text(src), src.len(), None); } } - - (Object::Text(src), src.len(), None) } } diff --git a/src/parser.rs b/src/parser.rs index 558bbec..81f0905 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -350,44 +350,12 @@ impl<'a> Parser<'a> { Container::Underline { .. } => Event::UnderlineEnd, } } - - fn check_off(&self) { - use self::Container::*; - - if let Some(container) = self.stack.last() { - match *container { - Headline { end, .. } - | Section { end } - | List { end, .. } - | ListItem { end } - | Italic { end } - | Strike { end } - | Bold { end } - | Underline { end } => { - debug_assert!(self.off <= end); - } - Paragraph { cont_end, end } => { - debug_assert!(self.off <= end); - debug_assert!(self.off <= cont_end); - } - CtrBlock { cont_end, end } - | QteBlock { cont_end, end } - | SplBlock { cont_end, end } - | DynBlock { cont_end, end } => { - debug_assert!(self.off <= cont_end); - debug_assert!(self.off <= end); - } - } - } - } } impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { - // self.check_off(); - if self.stack.is_empty() { if self.off >= self.text.len() { None