fix: single \r will be consider as blank line

This commit is contained in:
PoiScript 2023-11-14 11:46:26 +08:00
parent b6e86a128a
commit a269f2f258
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
23 changed files with 101 additions and 305 deletions

View file

@ -19,7 +19,7 @@ use rowan::{ast::AstNode, Language, NodeOrToken};
pub fn blank_lines(parent: &SyntaxNode) -> usize {
parent
.children()
.children_with_tokens()
.filter(|n| n.kind() == SyntaxKind::BLANK_LINE)
.count()
}

View file

@ -168,10 +168,8 @@ r#"#+BEGIN_SRC
TEXT@8..11 "SRC"
TEXT@11..11 ""
NEW_LINE@11..12 "\n"
BLANK_LINE@12..13
NEW_LINE@12..13 "\n"
BLANK_LINE@13..14
NEW_LINE@13..14 "\n"
BLANK_LINE@12..13 "\n"
BLANK_LINE@13..14 "\n"
BLOCK_CONTENT@14..14
BLOCK_END@14..27
WHITESPACE@14..18 " "
@ -222,10 +220,8 @@ alert('Hello World!');
TEXT@54..60 "#+END_"
TEXT@60..63 "SRC"
NEW_LINE@63..64 "\n"
BLANK_LINE@64..65
NEW_LINE@64..65 "\n"
BLANK_LINE@65..69
WHITESPACE@65..69 " "
BLANK_LINE@64..65 "\n"
BLANK_LINE@65..69 " "
"###
);

View file

@ -130,8 +130,7 @@ fn parse() {
COLON@59..60 ":"
TEXT@60..62 "00"
NEW_LINE@62..63 "\n"
BLANK_LINE@63..64
NEW_LINE@63..64 "\n"
BLANK_LINE@63..64 "\n"
"###
);
}

View file

@ -1,13 +1,9 @@
use std::iter::once;
use memchr::{memchr, memchr_iter};
use memchr::{memchr, memchr2_iter, memchr_iter};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{line_ending, space0},
combinator::eof,
sequence::tuple,
AsBytes, IResult, InputLength, InputTake, Parser,
bytes::complete::tag, character::complete::space0, AsBytes, IResult, InputLength, InputTake,
Parser,
};
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
@ -97,23 +93,30 @@ where
/// Takes all blank lines
pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
let mut lines = vec![];
let mut i = input;
if input.is_empty() {
return Ok((input, vec![]));
}
while !i.is_empty() {
match tuple::<_, _, (), _>((space0, alt((line_ending, eof))))(i) {
Ok((input, (ws, nl))) => {
let mut b = NodeBuilder::new();
b.ws(ws);
b.nl(nl);
lines.push(b.finish(BLANK_LINE));
i = input;
}
_ => break,
let mut lines = vec![];
let mut start = 0;
let bytes = input.as_bytes();
for index in memchr2_iter(b'\r', b'\n', bytes)
.map(|i| i + 1)
.chain(once(bytes.len()))
{
if bytes.get(index - 1) == Some(&b'\r') && bytes.get(index) == Some(&b'\n') {
continue;
}
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
start = index;
} else {
break;
}
}
Ok((i, lines))
Ok((input.take_split(start).0, lines))
}
#[test]
@ -124,6 +127,11 @@ fn test_blank_lines() {
assert_eq!(input.as_str(), "");
assert_eq!(output, vec![]);
let (input, output) = blank_lines(("\n", config).into()).unwrap();
assert_eq!(input.as_str(), "");
assert_eq!(output.len(), 1);
assert_eq!(output[0].to_string(), "\n");
let (input, output) = blank_lines((" t", config).into()).unwrap();
assert_eq!(input.as_str(), " t");
assert_eq!(output, vec![]);
@ -138,13 +146,15 @@ fn test_blank_lines() {
assert_eq!(output[4].to_string(), " ");
let (input, output) =
blank_lines((" \r\n\n\t\t\r\n \n t\n \r\n\n\t\t\r\n \n", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n \r\n\n\t\t\r\n \n");
assert_eq!(output.len(), 4);
assert_eq!(output[0].to_string(), " \r\n");
blank_lines(("\r\n\n\t\t\r\n \n\r \r t\n ", config).into()).unwrap();
assert_eq!(input.as_str(), " t\n ");
assert_eq!(output.len(), 6);
assert_eq!(output[0].to_string(), "\r\n");
assert_eq!(output[1].to_string(), "\n");
assert_eq!(output[2].to_string(), "\t\t\r\n");
assert_eq!(output[3].to_string(), " \n");
assert_eq!(output[4].to_string(), "\r");
assert_eq!(output[5].to_string(), " \r");
}
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding

View file

@ -70,8 +70,7 @@ fn parse() {
@r###"
COMMENT@0..12
TEXT@0..11 "#\n # a\n #\n"
BLANK_LINE@11..12
NEW_LINE@11..12 "\n"
BLANK_LINE@11..12 "\n"
"###
);

View file

@ -53,13 +53,9 @@ fn parse() {
to_document("\n \n\n").syntax,
@r###"
DOCUMENT@0..5
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@1..4
WHITESPACE@1..3 " "
NEW_LINE@3..4 "\n"
BLANK_LINE@4..5
NEW_LINE@4..5 "\n"
BLANK_LINE@0..1 "\n"
BLANK_LINE@1..4 " \n"
BLANK_LINE@4..5 "\n"
"###
);
@ -77,8 +73,7 @@ fn parse() {
to_document("\n* section").syntax,
@r###"
DOCUMENT@0..10
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@0..1 "\n"
HEADLINE@1..10
HEADLINE_STARS@1..2 "*"
WHITESPACE@2..3 " "
@ -91,8 +86,7 @@ fn parse() {
to_document("\n** heading 2\n* heading 1").syntax,
@r###"
DOCUMENT@0..25
BLANK_LINE@0..1
NEW_LINE@0..1 "\n"
BLANK_LINE@0..1 "\n"
HEADLINE@1..14
HEADLINE_STARS@1..3 "**"
WHITESPACE@3..4 " "

View file

@ -179,8 +179,7 @@ fn parse() {
TEXT@1..7 "DRAWER"
COLON@7..8 ":"
NEW_LINE@8..9 "\n"
BLANK_LINE@9..10
NEW_LINE@9..10 "\n"
BLANK_LINE@9..10 "\n"
TEXT@10..10 ""
DRAWER_END@10..18
WHITESPACE@10..12 " "
@ -188,8 +187,7 @@ fn parse() {
TEXT@13..16 "END"
COLON@16..17 ":"
NEW_LINE@17..18 "\n"
BLANK_LINE@18..19
NEW_LINE@18..19 "\n"
BLANK_LINE@18..19 "\n"
"###
);

View file

@ -99,14 +99,12 @@ CONTENTS
TEXT@9..19 "clocktable"
TEXT@19..31 " :scope file"
NEW_LINE@31..32 "\n"
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
BLANK_LINE@32..33 "\n"
TEXT@33..42 "CONTENTS\n"
DYN_BLOCK_END@42..49
TEXT@42..48 "#+END:"
NEW_LINE@48..49 "\n"
BLANK_LINE@49..53
WHITESPACE@49..53 " "
BLANK_LINE@49..53 " "
"###
);
}

View file

@ -208,8 +208,7 @@ b"#),
SECTION@0..4
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
BLANK_LINE@2..3 "\n"
PARAGRAPH@3..4
TEXT@3..4 "b"
"###

View file

@ -55,10 +55,8 @@ fn parse() {
@r###"
FIXED_WIDTH@0..19
TEXT@0..14 ": A\n:\n: B\n: C\n"
BLANK_LINE@14..15
NEW_LINE@14..15 "\n"
BLANK_LINE@15..19
WHITESPACE@15..19 " "
BLANK_LINE@14..15 "\n"
BLANK_LINE@15..19 " "
"###
);
}

View file

@ -84,7 +84,6 @@ pub fn affiliated_keyword_nodes(input: Input) -> IResult<Input, Vec<GreenElement
// find the first text token in children
let Some(NodeOrToken::Token(token)) = node
.children()
.into_iter()
.find(|t| t.kind() == SyntaxKind::TEXT.into())
else {
return Err(nom::Err::Error(()));

View file

@ -328,8 +328,7 @@ fn parse() {
LIST_ITEM_CONTENT@7..11
PARAGRAPH@7..10
TEXT@7..10 "1\n\n"
BLANK_LINE@10..11
NEW_LINE@10..11 "\n"
BLANK_LINE@10..11 "\n"
LIST_ITEM@11..14
LIST_ITEM_INDENT@11..11 ""
LIST_ITEM_BULLET@11..13 "+ "

View file

@ -99,10 +99,10 @@ pub enum SyntaxKind {
DOUBLE_ARROW, // '=>'
PIPE, // '|'
COMMA, // ','
TEXT,
NEW_LINE, // '\n' or '\r\n' or '\r'
WHITESPACE, // ' ' or '\t'
BLANK_LINE,
WHITESPACE,
NEW_LINE,
TEXT,
DOCUMENT,
SECTION,

View file

@ -65,8 +65,7 @@ fn parse() {
@r###"
PARAGRAPH@0..6
TEXT@0..2 "a\n"
BLANK_LINE@2..6
WHITESPACE@2..6 " "
BLANK_LINE@2..6 " "
"###
);
@ -89,8 +88,7 @@ c
@r###"
PARAGRAPH@0..3
TEXT@0..2 "a\n"
BLANK_LINE@2..3
NEW_LINE@2..3 "\n"
BLANK_LINE@2..3 "\n"
"###
);
}

View file

@ -62,10 +62,8 @@ fn parse() {
RULE@0..8
TEXT@0..5 "-----"
NEW_LINE@5..6 "\n"
BLANK_LINE@6..7
NEW_LINE@6..7 "\n"
BLANK_LINE@7..8
NEW_LINE@7..8 "\n"
BLANK_LINE@6..7 "\n"
BLANK_LINE@7..8 "\n"
"###
);

View file

@ -195,10 +195,8 @@ fn parse_table_el() {
@r###"
TABLE_EL@0..37
TEXT@0..32 " +---+\n | |\n ..."
BLANK_LINE@32..33
NEW_LINE@32..33 "\n"
BLANK_LINE@33..37
WHITESPACE@33..37 " "
BLANK_LINE@32..33 "\n"
BLANK_LINE@33..37 " "
"###
);