fix: single \r will be consider as blank line

This commit is contained in:
PoiScript 2023-11-14 11:46:26 +08:00
parent b6e86a128a
commit a269f2f258
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
23 changed files with 101 additions and 305 deletions

View file

@ -13,9 +13,6 @@ exclude = ["/wasm", "/.github"]
[package.metadata.docs.rs]
all-features = true
[badges]
travis-ci = { repository = "PoiScript/orgize" }
[features]
default = []
indexmap = ["dep:indexmap"]
@ -36,7 +33,6 @@ tracing = "0.1"
[dev-dependencies]
criterion = "0.4"
pretty_assertions = "1.3"
insta = "1.29"
serde_json = "1.0"
slugify = "0.1"

View file

@ -3,9 +3,9 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use orgize::Org;
const INPUT: &[(&str, &str)] = &[
// ("org-syntax.org", include_str!("./org-syntax.org")),
("doc.org", include_str!("./doc.org")),
("org-faq.org", include_str!("./org-faq.org")),
("org-syntax.org", include_str!("./org-syntax.org")),
];
pub fn bench_parse(c: &mut Criterion) {

View file

@ -1,11 +1,7 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use orgize::syntax::{HtmlHandler, Org};
use std::str;
fuzz_target!(|data: &[u8]| {
if let Ok(utf8) = str::from_utf8(data) {
let _ = Org::parse(utf8);
libfuzzer_sys::fuzz_target!(|data: &[u8]| {
if let Ok(utf8) = std::str::from_utf8(data) {
let _ = orgize::Org::parse(utf8);
}
});

View file

@ -19,7 +19,7 @@ use rowan::{ast::AstNode, Language, NodeOrToken};
pub fn blank_lines(parent: &SyntaxNode) -> usize {
parent
.children()
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::BLANK_LINE)
.count()
}

View file

@ -168,10 +168,8 @@ r#"#+BEGIN_SRC
TEXT@8..11 "SRC"
TEXT@11..11 ""
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13
NEW_LINE@12..13 "\n"
BLANK_LINE@13..14
NEW_LINE@13..14 "\n"
BLANK_LINE@12..13 "\n"
BLANK_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
@ -222,10 +220,8 @@ alert('Hello World!');
TEXT@54..60 "#+END_"
TEXT@60..63 "SRC"
NEW_LINE@63..64 "\n"
BLANK_LINE@64..65
NEW_LINE@64..65 "\n"
BLANK_LINE@65..69
WHITESPACE@65..69 " "
BLANK_LINE@64..65 "\n"
BLANK_LINE@65..69 " "
"###
);

View file

@ -130,8 +130,7 @@ fn parse() {
COLON@59..60 ":"
TEXT@60..62 "00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64
NEW_LINE@63..64 "\n"
BLANK_LINE@63..64 "\n"
"###
);
}

View file

@ -1,13 +1,9 @@
use std::iter::once;
use memchr::{memchr, memchr_iter};
use memchr::{memchr, memchr2_iter, memchr_iter};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{line_ending, space0},
combinator::eof,
sequence::tuple,
AsBytes, IResult, InputLength, InputTake, Parser,
bytes::complete::tag, character::complete::space0, AsBytes, IResult, InputLength, InputTake,
Parser,
};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
@ -97,23 +93,30 @@ where
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut lines = vec![];
let mut i = input;
if input.is_empty() {
return Ok((input, vec![]));
}
while !i.is_empty() {
match tuple::<_, _, (), _>((space0, alt((line_ending, eof))))(i) {
Ok((input, (ws, nl))) => {
let mut b = NodeBuilder::new();
b.ws(ws);
b.nl(nl);
lines.push(b.finish(BLANK_LINE));
i = input;
}
_ => break,
let mut lines = vec![];
let mut start = 0;
let bytes = input.as_bytes();
for index in memchr2_iter(b'\r', b'\n', bytes)
.map(|i| i + 1)
.chain(once(bytes.len()))
{
if bytes.get(index - 1) == Some(&b'\r') && bytes.get(index) == Some(&b'\n') {
continue;
}
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
start = index;
} else {
break;
}
}
Ok((i, lines))
Ok((input.take_split(start).0, lines))
}
#[test]
@ -124,6 +127,11 @@ fn test_blank_lines() {
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines(("\n", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 1);
assert_eq!(output[0].to_string(), "\n");
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
@ -138,13 +146,15 @@ fn test_blank_lines() {
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines((" \r\n\n\t\t\r\n \n t\n \r\n\n\t\t\r\n \n", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n \r\n\n\t\t\r\n \n");
assert_eq!(output.len(), 4);
assert_eq!(output[0].to_string(), " \r\n");
blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n ");
assert_eq!(output.len(), 6);
assert_eq!(output[0].to_string(), "\r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), "\r");
assert_eq!(output[5].to_string(), " \r");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding

View file

@ -70,8 +70,7 @@ fn parse() {
@r###"
COMMENT@0..12
TEXT@0..11 "#\n # a\n #\n"
BLANK_LINE@11..12
NEW_LINE@11..12 "\n"
BLANK_LINE@11..12 "\n"
"###
);

View file

@ -53,13 +53,9 @@ fn parse() {
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@1..4
WHITESPACE@1..3 " "
NEW_LINE@3..4 "\n"
BLANK_LINE@4..5
NEW_LINE@4..5 "\n"
BLANK_LINE@0..1 "\n"
BLANK_LINE@1..4 " \n"
BLANK_LINE@4..5 "\n"
"###
);
@ -77,8 +73,7 @@ fn parse() {
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
@ -91,8 +86,7 @@ fn parse() {
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "

View file

@ -179,8 +179,7 @@ fn parse() {
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10
NEW_LINE@9..10 "\n"
BLANK_LINE@9..10 "\n"
TEXT@10..10 ""
DRAWER_END@10..18
WHITESPACE@10..12 " "
@ -188,8 +187,7 @@ fn parse() {
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19
NEW_LINE@18..19 "\n"
BLANK_LINE@18..19 "\n"
"###
);

View file

@ -99,14 +99,12 @@ CONTENTS
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
BLANK_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53
WHITESPACE@49..53 " "
BLANK_LINE@49..53 " "
"###
);
}

View file

@ -208,8 +208,7 @@ b"#),
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
BLANK_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###

View file

@ -55,10 +55,8 @@ fn parse() {
@r###"
FIXED_WIDTH@0..19
TEXT@0..14 ": A\n:\n: B\n: C\n"
BLANK_LINE@14..15
NEW_LINE@14..15 "\n"
BLANK_LINE@15..19
WHITESPACE@15..19 " "
BLANK_LINE@14..15 "\n"
BLANK_LINE@15..19 " "
"###
);
}

View file

@ -84,7 +84,6 @@ pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement
// find the first text token in children
let Some(NodeOrToken::Token(token)) = node
.children()
.into_iter()
.find(|t| t.kind() == SyntaxKind::TEXT.into())
else {
return Err(nom::Err::Error(()));

View file

@ -328,8 +328,7 @@ fn parse() {
LIST_ITEM_CONTENT@7..11
PARAGRAPH@7..10
TEXT@7..10 "1\n\n"
BLANK_LINE@10..11
NEW_LINE@10..11 "\n"
BLANK_LINE@10..11 "\n"
LIST_ITEM@11..14
LIST_ITEM_INDENT@11..11 ""
LIST_ITEM_BULLET@11..13 "+ "

View file

@ -99,10 +99,10 @@ pub enum SyntaxKind {
DOUBLE_ARROW, // '=>'
PIPE, // '|'
COMMA, // ','
TEXT,
NEW_LINE, // '\n' or '\r\n' or '\r'
WHITESPACE, // ' ' or '\t'
BLANK_LINE,
WHITESPACE,
NEW_LINE,
TEXT,
DOCUMENT,
SECTION,

View file

@ -65,8 +65,7 @@ fn parse() {
@r###"
PARAGRAPH@0..6
TEXT@0..2 "a\n"
BLANK_LINE@2..6
WHITESPACE@2..6 " "
BLANK_LINE@2..6 " "
"###
);
@ -89,8 +88,7 @@ c
@r###"
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
BLANK_LINE@2..3 "\n"
"###
);
}

View file

@ -62,10 +62,8 @@ fn parse() {
RULE@0..8
TEXT@0..5 "-----"
NEW_LINE@5..6 "\n"
BLANK_LINE@6..7
NEW_LINE@6..7 "\n"
BLANK_LINE@7..8
NEW_LINE@7..8 "\n"
BLANK_LINE@6..7 "\n"
BLANK_LINE@7..8 "\n"
"###
);

View file

@ -195,10 +195,8 @@ fn parse_table_el() {
@r###"
TABLE_EL@0..37
TEXT@0..32 " +---+\n | |\n ..."
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
BLANK_LINE@33..37
WHITESPACE@33..37 " "
BLANK_LINE@32..33 "\n"
BLANK_LINE@33..37 " "
"###
);

View file

@ -1,18 +0,0 @@
use orgize::Org;
#[test]
fn can_handle_empty_emphasis() {
let cases = &[
"* / // a",
"\"* / // a\"",
"* * ** a",
"* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n",
"* 2020\n:DRAWER:\n* ** a\n:END:",
"* * ** :a:",
"* * ** "
];
for case in cases {
let _ = Org::parse(case);
}
}

View file

@ -1,21 +0,0 @@
use orgize::Org;
#[test]
fn can_handle_empty_list_item() {
let cases = &[
"0. ",
"* \n0. ",
" * ",
" 0. ",
"\t* ",
"- ",
"- hello\n- ",
"- \n- hello",
"- hello\n- \n- world",
"* world\n- ",
];
for case in cases {
let _ = Org::parse(case);
}
}

View file

@ -1,8 +0,0 @@
use orgize::{ast::Paragraph, rowan::ast::AstNode, Org};
#[test]
fn whitespaces() {
let case = "\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}";
let paragraph = Org::parse(case).first_node::<Paragraph>().unwrap();
assert_eq!(&paragraph.syntax().to_string(), case);
}

View file

@ -1,168 +1,36 @@
use orgize::Org;
use pretty_assertions::assert_eq;
const INPUT: &[&str] = &[
// issue 10
"* / // a",
"\"* / // a\"",
"* * ** a",
"* 2020\n** December\n*** Experiment\nType A is marked with * and type B is marked with **.\n",
"* 2020\n:DRAWER:\n* ** a\n:END:",
"* * ** :a:",
"* * ** ",
// issue 11
"0. ",
"* \n0. ",
" * ",
" 0. ",
"\t* ",
"- ",
"- hello\n- ",
"- \n- hello",
"- hello\n- \n- world",
"* world\n- ",
// issue 22
"\u{000b}\u{0085}\u{00a0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200a}\u{2028}\u{2029}\u{202f}\u{205f}\u{3000}",
// fuzz test
"___\n",
"\n\n\n",
"\n\n\n",
"\n*",
"\r-"
];
macro_rules! test_suite {
($name:ident, $content:expr, $expected:expr) => {
#[test]
fn $name() {
assert_eq!(Org::parse($content).to_html(), $expected);
}
};
#[test]
fn parse() {
for input in INPUT {
let _ = orgize::Org::parse(input);
}
}
test_suite!(
emphasis,
"*bold*, /italic/,\n_underlined_, =verbatim= and ~code~",
"<main><section><p><b>bold</b>, <i>italic</i>,\n<u>underlined</u>, \
<code>verbatim</code> and <code>code</code></p></section></main>"
);
test_suite!(
link,
"Visit[[http://example.com][link1]]or[[http://example.com][link1]].",
r#"<main><section><p>Visit<a href="http://example.com">link1</a>or<a href="http://example.com">link1</a>.</p></section></main>"#
);
test_suite!(
section_and_headline,
r#"
* title 1
section 1
** title 2
section 2
* title 3
section 3
* title 4
section 4
"#,
"<main><h1>title 1</h1><section><p>section 1</p></section>\
<h2>title 2</h2><section><p>section 2</p></section>\
<h1>title 3</h1><section><p>section 3</p></section>\
<h1>title 4</h1><section><p>section 4</p></section></main>"
);
test_suite!(
list,
r#"
+ 1
+ 2
- 3
- 4
+ 5
"#,
"<main><section><ul>\
<li><p>1</p></li>\
<li><p>2</p><ul><li><p>3</p></li><li><p>4</p></li></ul></li>\
<li><p>5</p></li>\
</ul></section></main>"
);
test_suite!(
snippet,
"@@html:<del>@@delete this@@html:</del>@@",
"<main><section><p><del>delete this</del></p></section></main>"
);
test_suite!(
paragraphs,
r#"
* title
paragraph 1
paragraph 2
paragraph 3
paragraph 4
"#,
"<main><h1>title</h1><section>\
<p>paragraph 1</p><p>paragraph 2</p>\
<p>paragraph 3</p><p>paragraph 4</p>\
</section></main>"
);
test_suite!(
table1,
r#"
|-----+-----+-----|
| 0 | 1 | 2 |
|-----+-----+-----|
| 4 | 5 | 6 |
"#,
"<main><section><table>\
<thead><tr><th>0</th><th>1</th><th>2</th></tr></thead>\
<tbody><tr><td>4</td><td>5</td><td>6</td></tr></tbody>\
</table></section></main>"
);
test_suite!(
table2,
r#"
|-----+-----+-----|
| 0 | 1 | 2 |
| 4 | 5 | 6 |
|-----+-----+-----|
"#,
"<main><section><table>\
<tbody><tr><td>0</td><td>1</td><td>2</td></tr>\
<tr><td>4</td><td>5</td><td>6</td></tr></tbody>\
</table></section></main>"
);
test_suite!(
table3,
r#"
|-----+-----+-----|
|-----+-----+-----|
| 0 | 1 | 2 |
| 4 | 5 | 6 |
"#,
"<main><section><table><thead></thead>\
<tbody><tr><td>0</td><td>1</td><td>2</td></tr>\
<tr><td>4</td><td>5</td><td>6</td></tr></tbody>\
</table></section></main>"
);
test_suite!(
table4,
r#"
| 0 | 1 | 2 |
| 4 | 5 | 6 |
|-----+-----+-----|
|-----+-----+-----|
"#,
"<main><section><table>\
<thead><tr><th>0</th><th>1</th><th>2</th></tr>\
<tr><th>4</th><th>5</th><th>6</th></tr></thead>\
<tbody></tbody></table></section></main>"
);
test_suite!(
table5,
r#"
|-----+-----+-----|
|-----+-----+-----|
"#,
"<main><section><table><tbody></tbody></table></section></main>"
);
test_suite!(
table6,
r#"
|
|-
|
|-
|
"#,
"<main><section><table>\
<thead><tr></tr></thead>\
<tbody><tr></tr></tbody>\
<tbody><tr></tr></tbody>\
</table></section></main>"
);