fix: consider single '\r' as newline
This commit is contained in:
parent
8fcfd60712
commit
471a23c958
13 changed files with 138 additions and 110 deletions
|
|
@ -1,15 +1,14 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, tag_no_case},
|
||||
character::complete::{alpha1, line_ending, space0},
|
||||
combinator::eof,
|
||||
character::complete::{alpha1, space0},
|
||||
sequence::tuple,
|
||||
IResult, InputTake,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{
|
||||
blank_lines, line_starts_iter, node, token, trim_line_end, GreenElement, NodeBuilder,
|
||||
blank_lines, eol_or_eof, line_starts_iter, node, token, trim_line_end, GreenElement,
|
||||
NodeBuilder,
|
||||
},
|
||||
element::element_nodes,
|
||||
input::Input,
|
||||
|
|
@ -67,13 +66,8 @@ fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
|
|||
}
|
||||
|
||||
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
let (input, (ws, end, name, ws_, nl)) = tuple((
|
||||
space0,
|
||||
tag_no_case("#+END_"),
|
||||
tag(name),
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
))(input)?;
|
||||
let (input, (ws, end, name, ws_, nl)) =
|
||||
tuple((space0, tag_no_case("#+END_"), tag(name), space0, eol_or_eof))(input)?;
|
||||
|
||||
let mut b = NodeBuilder::new();
|
||||
b.ws(ws);
|
||||
|
|
|
|||
|
|
@ -1,14 +1,16 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::tag,
|
||||
character::complete::{digit1, line_ending, space0},
|
||||
combinator::{eof, map, opt, recognize},
|
||||
character::complete::{digit1, space0},
|
||||
combinator::{map, opt, recognize},
|
||||
sequence::tuple,
|
||||
IResult,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, colon_token, double_arrow_token, GreenElement, NodeBuilder},
|
||||
combinator::{
|
||||
blank_lines, colon_token, double_arrow_token, eol_or_eof, GreenElement, NodeBuilder,
|
||||
},
|
||||
input::Input,
|
||||
timestamp::{timestamp_active_node, timestamp_inactive_node},
|
||||
SyntaxKind,
|
||||
|
|
@ -29,7 +31,7 @@ pub fn clock_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
recognize(tuple((digit1, colon_token, digit1))),
|
||||
))),
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
eol_or_eof,
|
||||
blank_lines,
|
||||
)),
|
||||
|(ws, clock, ws_, timestamp, duration, ws__, nl, post_blank)| {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
use std::iter::once;
|
||||
|
||||
use memchr::{memchr, memchr2_iter, memchr_iter};
|
||||
use nom::{
|
||||
bytes::complete::tag, character::complete::space0, AsBytes, IResult, InputLength, InputTake,
|
||||
};
|
||||
use memchr::{memchr2, memchr2_iter, Memchr2};
|
||||
use nom::{bytes::complete::tag, AsBytes, IResult, InputTake, Slice};
|
||||
use rowan::{GreenNode, GreenToken, Language, NodeOrToken};
|
||||
use std::iter::once;
|
||||
|
||||
use super::{input::Input, OrgLanguage, SyntaxKind, SyntaxKind::*};
|
||||
|
||||
|
|
@ -101,13 +98,7 @@ pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
|
|||
let mut start = 0;
|
||||
let bytes = input.as_bytes();
|
||||
|
||||
for index in memchr2_iter(b'\r', b'\n', bytes)
|
||||
.map(|i| i + 1)
|
||||
.chain(once(bytes.len()))
|
||||
{
|
||||
if bytes.get(index - 1) == Some(&b'\r') && bytes.get(index) == Some(&b'\n') {
|
||||
continue;
|
||||
}
|
||||
for index in line_ends_iter(input.as_str()) {
|
||||
if start != index && bytes[start..index].iter().all(|b| b.is_ascii_whitespace()) {
|
||||
lines.push(token(BLANK_LINE, &input.as_str()[start..index]));
|
||||
start = index;
|
||||
|
|
@ -116,7 +107,7 @@ pub fn blank_lines(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
|
|||
}
|
||||
}
|
||||
|
||||
Ok((input.take_split(start).0, lines))
|
||||
Ok((input.slice(start..), lines))
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -159,21 +150,26 @@ fn test_blank_lines() {
|
|||
|
||||
/// Returns 1. anything before trailing whitespace, 2. whitespace itself, 3. line feeding
|
||||
pub fn trim_line_end(input: Input) -> IResult<Input, (Input, Input, Input), ()> {
|
||||
let (input, line) = input.take_split(
|
||||
memchr(b'\n', input.as_bytes())
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(input.input_len()),
|
||||
);
|
||||
let bytes = input.as_bytes();
|
||||
|
||||
let (ws_and_nl, contents) = line.take_split(
|
||||
line.as_bytes()
|
||||
.iter()
|
||||
.rposition(|u| !u.is_ascii_whitespace())
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(0),
|
||||
);
|
||||
let (input, contents, nl) = match memchr2(b'\r', b'\n', bytes) {
|
||||
Some(i) if bytes[i] == b'\r' && matches!(bytes.get(i + 1), Some(b'\n')) => (
|
||||
input.slice(i + 2..),
|
||||
input.slice(0..i),
|
||||
input.slice(i..i + 2),
|
||||
),
|
||||
Some(i) => (
|
||||
input.slice(i + 1..),
|
||||
input.slice(0..i),
|
||||
input.slice(i..i + 1),
|
||||
),
|
||||
_ => (input.of(""), input, input.of("")),
|
||||
};
|
||||
|
||||
let (nl, ws) = space0(ws_and_nl)?;
|
||||
let (contents, ws) = match contents.bytes().rposition(|u| !u.is_ascii_whitespace()) {
|
||||
Some(i) => (contents.slice(0..i + 1), contents.slice(i + 1..)),
|
||||
None => (contents.of(""), contents),
|
||||
};
|
||||
|
||||
Ok((input, (contents, ws, nl)))
|
||||
}
|
||||
|
|
@ -200,18 +196,72 @@ fn test_trim_line_end() {
|
|||
assert_eq!(output.0.as_str(), "* hello, world :abc:");
|
||||
assert_eq!(output.1.as_str(), " ");
|
||||
assert_eq!(output.2.as_str(), "\r\n");
|
||||
|
||||
let (input, output) = trim_line_end((" \rr", config).into()).unwrap();
|
||||
assert_eq!(input.as_str(), "r");
|
||||
assert_eq!(output.0.as_str(), "");
|
||||
assert_eq!(output.1.as_str(), " ");
|
||||
assert_eq!(output.2.as_str(), "\r");
|
||||
}
|
||||
|
||||
/// Recognizes a line ending \r, \n, \r\n or end of file
|
||||
pub fn eol_or_eof(input: Input) -> IResult<Input, Input, ()> {
|
||||
let mut bytes = input.bytes();
|
||||
|
||||
let count = match bytes.next() {
|
||||
Some(b'\n') => 1,
|
||||
Some(b'\r') => {
|
||||
if matches!(bytes.next(), Some(b'\n')) {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
None => 0,
|
||||
_ => return Err(nom::Err::Error(())),
|
||||
};
|
||||
|
||||
Ok(input.take_split(count))
|
||||
}
|
||||
|
||||
struct LineStart<'a> {
|
||||
bytes: &'a [u8],
|
||||
iter: Memchr2<'a>,
|
||||
}
|
||||
|
||||
impl<'a> LineStart<'a> {
|
||||
fn new(input: &'a str) -> Self {
|
||||
let bytes = input.as_bytes();
|
||||
LineStart {
|
||||
bytes,
|
||||
iter: memchr2_iter(b'\r', b'\n', bytes),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LineStart<'a> {
|
||||
type Item = usize;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let i = self.iter.next()?;
|
||||
if self.bytes[i] == b'\r' && self.bytes.get(i + 1) == Some(&b'\n') {
|
||||
let ii = self.iter.next();
|
||||
debug_assert_eq!(i + 1, ii.unwrap());
|
||||
Some(i + 2)
|
||||
} else {
|
||||
Some(i + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator of positions of line start, including zero
|
||||
pub fn line_starts_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
|
||||
once(0).chain(memchr_iter(b'\n', s.as_bytes()).map(|i| i + 1))
|
||||
once(0).chain(LineStart::new(s))
|
||||
}
|
||||
|
||||
/// Returns an iterator of positions of line end, including eof
|
||||
pub fn line_ends_iter(s: &str) -> impl Iterator<Item = usize> + '_ {
|
||||
memchr_iter(b'\n', s.as_bytes())
|
||||
.map(|i| i + 1)
|
||||
.chain(once(s.len()))
|
||||
LineStart::new(s).chain(once(s.len()))
|
||||
}
|
||||
|
||||
pub struct NodeBuilder {
|
||||
|
|
@ -233,7 +283,7 @@ impl NodeBuilder {
|
|||
pub fn nl(&mut self, i: Input) {
|
||||
if !i.is_empty() {
|
||||
debug_assert!(
|
||||
i.s == "\n" || i.s == "\r\n",
|
||||
i.s == "\n" || i.s == "\r\n" || i.s == "\r",
|
||||
"{:?} should be a new line",
|
||||
i.s
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use nom::{IResult, InputTake};
|
||||
use nom::{AsBytes, IResult, InputTake};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, line_ends_iter, node, GreenElement},
|
||||
|
|
@ -9,10 +9,13 @@ use super::{
|
|||
fn comment_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let mut start = 0;
|
||||
for i in line_ends_iter(input.as_str()) {
|
||||
let line = &input.as_str()[start..i];
|
||||
let trimmed = line.trim_start();
|
||||
let mut iter = input.as_bytes()[start..]
|
||||
.iter()
|
||||
.skip_while(|&&b| b == b' ' || b == b'\t');
|
||||
|
||||
if trimmed == "#" || trimmed == "#\n" || trimmed == "#\r\n" || trimmed.starts_with("# ") {
|
||||
if matches!(iter.next(), Some(b'#'))
|
||||
&& matches!(iter.next(), None | Some(b'\n') | Some(b'\r') | Some(b' '))
|
||||
{
|
||||
start = i;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,16 +1,15 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag_no_case, take_while1},
|
||||
character::complete::{line_ending, space0, space1},
|
||||
combinator::{eof, iterator, map, opt},
|
||||
character::complete::{space0, space1},
|
||||
combinator::{iterator, map, opt},
|
||||
sequence::tuple,
|
||||
IResult, InputTake,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{
|
||||
blank_lines, colon_token, line_starts_iter, node, plus_token, trim_line_end, GreenElement,
|
||||
NodeBuilder,
|
||||
blank_lines, colon_token, eol_or_eof, line_starts_iter, node, plus_token, trim_line_end,
|
||||
GreenElement, NodeBuilder,
|
||||
},
|
||||
input::Input,
|
||||
SyntaxKind::*,
|
||||
|
|
@ -25,7 +24,7 @@ fn drawer_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
|
|||
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
|
||||
colon_token,
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
eol_or_eof,
|
||||
))(input)?;
|
||||
|
||||
b.ws(ws);
|
||||
|
|
@ -45,7 +44,7 @@ fn drawer_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
tag_no_case("END"),
|
||||
colon_token,
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
eol_or_eof,
|
||||
))(input)?;
|
||||
|
||||
let mut b = NodeBuilder::new();
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::tag_no_case,
|
||||
character::complete::{alpha1, line_ending, space0, space1},
|
||||
combinator::eof,
|
||||
character::complete::{alpha1, space0, space1},
|
||||
sequence::tuple,
|
||||
IResult, InputTake,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder},
|
||||
combinator::{
|
||||
blank_lines, eol_or_eof, line_starts_iter, node, trim_line_end, GreenElement, NodeBuilder,
|
||||
},
|
||||
input::Input,
|
||||
SyntaxKind::*,
|
||||
};
|
||||
|
|
@ -55,12 +55,8 @@ fn dyn_block_begin_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
}
|
||||
|
||||
fn dyn_block_end_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let (input, (ws, end, ws_, nl)) = tuple((
|
||||
space0,
|
||||
tag_no_case("#+END:"),
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
))(input)?;
|
||||
let (input, (ws, end, ws_, nl)) =
|
||||
tuple((space0, tag_no_case("#+END:"), space0, eol_or_eof))(input)?;
|
||||
|
||||
let mut b = NodeBuilder::new();
|
||||
b.ws(ws);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use nom::{IResult, InputTake};
|
||||
use nom::{AsBytes, IResult, InputTake};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, line_ends_iter, node, GreenElement},
|
||||
|
|
@ -9,10 +9,13 @@ use super::{
|
|||
fn fixed_width_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
let mut start = 0;
|
||||
for i in line_ends_iter(input.as_str()) {
|
||||
let line = &input.s[start..i];
|
||||
let trimmed = line.trim_start();
|
||||
let mut iter = input.as_bytes()[start..]
|
||||
.iter()
|
||||
.skip_while(|&&b| b == b' ' || b == b'\t');
|
||||
|
||||
if trimmed == ":" || trimmed == ":\n" || trimmed == ":\r\n" || trimmed.starts_with(": ") {
|
||||
if matches!(iter.next(), Some(b':'))
|
||||
&& matches!(iter.next(), None | Some(b'\n') | Some(b'\r') | Some(b' '))
|
||||
{
|
||||
start = i;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while1},
|
||||
character::complete::{line_ending, space0},
|
||||
combinator::eof,
|
||||
character::complete::space0,
|
||||
sequence::tuple,
|
||||
IResult, InputTake,
|
||||
};
|
||||
|
|
@ -10,7 +8,7 @@ use nom::{
|
|||
use crate::SyntaxKind;
|
||||
|
||||
use super::{
|
||||
combinator::{l_curly_token, line_starts_iter, node, r_curly_token, GreenElement},
|
||||
combinator::{eol_or_eof, l_curly_token, line_starts_iter, node, r_curly_token, GreenElement},
|
||||
input::Input,
|
||||
};
|
||||
|
||||
|
|
@ -36,7 +34,7 @@ fn latex_environment_node_base(input: Input) -> IResult<Input, GreenElement, ()>
|
|||
tag(name1.s),
|
||||
r_curly_token,
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
eol_or_eof,
|
||||
))(input)
|
||||
{
|
||||
return Ok((
|
||||
|
|
|
|||
|
|
@ -1,13 +1,7 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
character::complete::{line_ending, space0},
|
||||
combinator::{eof, map},
|
||||
sequence::tuple,
|
||||
IResult,
|
||||
};
|
||||
use nom::{character::complete::space0, combinator::map, sequence::tuple, IResult};
|
||||
|
||||
use crate::{
|
||||
syntax::combinator::{backslash_token, node},
|
||||
syntax::combinator::{backslash_token, eol_or_eof, node},
|
||||
SyntaxKind,
|
||||
};
|
||||
|
||||
|
|
@ -16,12 +10,7 @@ use super::{combinator::GreenElement, input::Input};
|
|||
pub fn line_break_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
debug_assert!(input.s.starts_with('\\'));
|
||||
let mut parser = map(
|
||||
tuple((
|
||||
backslash_token,
|
||||
backslash_token,
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
)),
|
||||
tuple((backslash_token, backslash_token, space0, eol_or_eof)),
|
||||
|(b1, b2, ws, nl)| {
|
||||
node(
|
||||
SyntaxKind::LINE_BREAK,
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::tag,
|
||||
character::complete::{line_ending, space0},
|
||||
combinator::{eof, iterator},
|
||||
sequence::tuple,
|
||||
IResult,
|
||||
branch::alt, bytes::complete::tag, character::complete::space0, combinator::iterator,
|
||||
sequence::tuple, IResult,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{GreenElement, NodeBuilder},
|
||||
combinator::{eol_or_eof, GreenElement, NodeBuilder},
|
||||
input::Input,
|
||||
timestamp::{timestamp_active_node, timestamp_inactive_node},
|
||||
SyntaxKind::*,
|
||||
|
|
@ -54,7 +50,7 @@ fn planning_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
|
||||
let (input, _) = it.finish()?;
|
||||
let (input, ws) = space0(input)?;
|
||||
let (input, nl) = alt((line_ending, eof))(input)?;
|
||||
let (input, nl) = eol_or_eof(input)?;
|
||||
|
||||
b.ws(ws);
|
||||
b.nl(nl);
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::take_while_m_n,
|
||||
character::complete::{line_ending, space0},
|
||||
combinator::{eof, map},
|
||||
sequence::tuple,
|
||||
bytes::complete::take_while_m_n, character::complete::space0, combinator::map, sequence::tuple,
|
||||
IResult,
|
||||
};
|
||||
|
||||
use super::{
|
||||
combinator::{blank_lines, GreenElement, NodeBuilder},
|
||||
combinator::{blank_lines, eol_or_eof, GreenElement, NodeBuilder},
|
||||
input::Input,
|
||||
SyntaxKind::*,
|
||||
};
|
||||
|
|
@ -19,7 +15,7 @@ pub fn rule_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
space0,
|
||||
take_while_m_n(5, usize::max_value(), |c| c == '-'),
|
||||
space0,
|
||||
alt((line_ending, eof)),
|
||||
eol_or_eof,
|
||||
blank_lines,
|
||||
)),
|
||||
|(ws, dashes, ws_, nl, post_blank)| {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ fn org_table_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
let mut start = 0;
|
||||
for i in line_ends_iter(input.as_str()) {
|
||||
let line = input.slice(start..i);
|
||||
let trimmed = line.as_str().trim_start();
|
||||
let trimmed = line.as_str().trim_start_matches([' ', '\t']);
|
||||
|
||||
// Org tables end at the first line not starting with a vertical bar.
|
||||
if !trimmed.starts_with('|') {
|
||||
|
|
@ -81,7 +81,8 @@ fn table_standard_row_node(input: Input) -> Result<GreenElement, nom::Err<()>> {
|
|||
}
|
||||
}
|
||||
});
|
||||
it.finish()?;
|
||||
let (input, _) = it.finish()?;
|
||||
debug_assert!(input.is_empty());
|
||||
|
||||
Ok(b.finish(ORG_TABLE_STANDARD_ROW))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,9 +23,10 @@ const INPUT: &[&str] = &[
|
|||
// fuzz test
|
||||
"___\n",
|
||||
"\n\n\n",
|
||||
"\n\n\n",
|
||||
"\n*",
|
||||
"\r-"
|
||||
"\r-",
|
||||
"6\r\n",
|
||||
"|\n\u{b}|"
|
||||
];
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue