diff --git a/examples/html-slugify.rs b/examples/html-slugify.rs index f1b7595..ecf8b5c 100644 --- a/examples/html-slugify.rs +++ b/examples/html-slugify.rs @@ -50,7 +50,7 @@ impl Traverser for MyHtmlHandler { special_block quote_block center_block verse_block comment_block example_block export_block source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment - latex_environment entity + latex_environment entity line_break } } diff --git a/src/ast/generate.js b/src/ast/generate.js index b0e6ea3..b81e51c 100644 --- a/src/ast/generate.js +++ b/src/ast/generate.js @@ -260,6 +260,10 @@ const nodes = [ struct: "Entity", kind: ["ENTITY"], }, + { + struct: "LineBreak", + kind: ["LINE_BREAK"], + }, ]; let content = `//! generated file, do not modify it directly diff --git a/src/ast/generated.rs b/src/ast/generated.rs index 8a32652..344903e 100644 --- a/src/ast/generated.rs +++ b/src/ast/generated.rs @@ -1691,3 +1691,28 @@ impl Entity { self.syntax.text_range().end().into() } } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LineBreak { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for LineBreak { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == LINE_BREAK + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| LineBreak { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl LineBreak { + pub fn begin(&self) -> u32 { + self.syntax.text_range().start().into() + } + pub fn end(&self) -> u32 { + self.syntax.text_range().end().into() + } +} diff --git a/src/export/forward.rs b/src/export/forward.rs index f27bfd7..268f962 100644 --- a/src/export/forward.rs +++ b/src/export/forward.rs @@ -49,7 +49,7 @@ /// special_block quote_block center_block verse_block comment_block example_block export_block /// source_block babel_call clock cookie radio_target drawer dyn_block fn_def fn_ref macros /// snippet timestamp target fixed_width org_table org_table_row org_table_cell latex_fragment -/// latex_environment entity +/// latex_environment entity line_break /// } /// } /// @@ -200,6 +200,9 @@ macro_rules! forward_handler { (@method $handler:ty, entity) => { forward_handler!(@method $handler, entity, WalkEvent<&$crate::ast::Entity>); }; + (@method $handler:ty, line_break) => { + forward_handler!(@method $handler, line_break, WalkEvent<&$crate::ast::LineBreak>); + }; (@method $handler:ty, $x:ident) => { std::compile_error!(std::concat!(std::stringify!($x), " is not a method")); }; diff --git a/src/export/html.rs b/src/export/html.rs index 286f534..b422b02 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -510,4 +510,12 @@ impl Traverser for HtmlExport { ctx.skip(); } } + + #[tracing::instrument(skip(self, ctx))] + fn line_break(&mut self, event: WalkEvent<&LineBreak>, ctx: &mut TraversalContext) { + if let WalkEvent::Enter(_) = event { + self.output += "
"; + ctx.skip(); + } + } } diff --git a/src/export/traverse.rs b/src/export/traverse.rs index bb25a7a..be2efd0 100644 --- a/src/export/traverse.rs +++ b/src/export/traverse.rs @@ -140,6 +140,7 @@ pub trait Traverser { LATEX_FRAGMENT => traverse!(LatexFragment, latex_fragment), LATEX_ENVIRONMENT => traverse!(LatexEnvironment, latex_environment), ENTITY => traverse!(Entity, entity), + LINE_BREAK => traverse!(LineBreak, line_break), BLOCK_CONTENT | LIST_ITEM_CONTENT => traverse_children!(node), @@ -156,97 +157,99 @@ pub trait Traverser { } /// Called when visiting `Text` token - fn text(&mut self, _token: SyntaxToken, _ctx: &mut TraversalContext); + fn text(&mut self, token: SyntaxToken, ctx: &mut TraversalContext); /// Called when entering or leaving `Document` node - fn document(&mut self, _event: WalkEvent<&Document>, _ctx: &mut TraversalContext); + fn document(&mut self, event: WalkEvent<&Document>, ctx: &mut TraversalContext); /// Called when entering or leaving `Headline` node - fn headline(&mut self, _event: WalkEvent<&Headline>, _ctx: &mut TraversalContext); + fn headline(&mut self, event: WalkEvent<&Headline>, ctx: &mut TraversalContext); /// Called when entering or leaving `Paragraph` node - fn paragraph(&mut self, _event: WalkEvent<&Paragraph>, _ctx: &mut TraversalContext); + fn paragraph(&mut self, event: WalkEvent<&Paragraph>, ctx: &mut TraversalContext); /// Called when entering or leaving `Section` node - fn section(&mut self, _event: WalkEvent<&Section>, _ctx: &mut TraversalContext); + fn section(&mut self, event: WalkEvent<&Section>, ctx: &mut TraversalContext); /// Called when entering or leaving `Rule` node - fn rule(&mut self, _event: WalkEvent<&Rule>, _ctx: &mut TraversalContext); + fn rule(&mut self, event: WalkEvent<&Rule>, ctx: &mut TraversalContext); /// Called when entering or leaving `Comment` node - fn comment(&mut self, _event: WalkEvent<&Comment>, _ctx: &mut TraversalContext); + fn comment(&mut self, event: WalkEvent<&Comment>, ctx: &mut TraversalContext); /// Called when entering or leaving `InlineSrc` node - fn inline_src(&mut self, _event: WalkEvent<&InlineSrc>, _ctx: &mut TraversalContext); + fn inline_src(&mut self, event: WalkEvent<&InlineSrc>, ctx: &mut TraversalContext); /// Called when entering or leaving `InlineCall` node - fn inline_call(&mut self, _event: WalkEvent<&InlineCall>, _ctx: &mut TraversalContext); + fn inline_call(&mut self, event: WalkEvent<&InlineCall>, ctx: &mut TraversalContext); /// Called when entering or leaving `Code` node - fn code(&mut self, _event: WalkEvent<&Code>, _ctx: &mut TraversalContext); + fn code(&mut self, event: WalkEvent<&Code>, ctx: &mut TraversalContext); /// Called when entering or leaving `Bold` node - fn bold(&mut self, _event: WalkEvent<&Bold>, _ctx: &mut TraversalContext); + fn bold(&mut self, event: WalkEvent<&Bold>, ctx: &mut TraversalContext); /// Called when entering or leaving `Verbatim` node - fn verbatim(&mut self, _event: WalkEvent<&Verbatim>, _ctx: &mut TraversalContext); + fn verbatim(&mut self, event: WalkEvent<&Verbatim>, ctx: &mut TraversalContext); /// Called when entering or leaving `Italic` node - fn italic(&mut self, _event: WalkEvent<&Italic>, _ctx: &mut TraversalContext); + fn italic(&mut self, event: WalkEvent<&Italic>, ctx: &mut TraversalContext); /// Called when entering or leaving `Strike` node - fn strike(&mut self, _event: WalkEvent<&Strike>, _ctx: &mut TraversalContext); + fn strike(&mut self, event: WalkEvent<&Strike>, ctx: &mut TraversalContext); /// Called when entering or leaving `Underline` node - fn underline(&mut self, _event: WalkEvent<&Underline>, _ctx: &mut TraversalContext); + fn underline(&mut self, event: WalkEvent<&Underline>, ctx: &mut TraversalContext); /// Called when entering or leaving `List` node - fn list(&mut self, _event: WalkEvent<&List>, _ctx: &mut TraversalContext); + fn list(&mut self, event: WalkEvent<&List>, ctx: &mut TraversalContext); /// Called when entering or leaving `ListItem` node - fn list_item(&mut self, _event: WalkEvent<&ListItem>, _ctx: &mut TraversalContext); + fn list_item(&mut self, event: WalkEvent<&ListItem>, ctx: &mut TraversalContext); /// Called when entering or leaving `SpecialBlock` node - fn special_block(&mut self, _event: WalkEvent<&SpecialBlock>, _ctx: &mut TraversalContext); + fn special_block(&mut self, event: WalkEvent<&SpecialBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `QuoteBlock` node - fn quote_block(&mut self, _event: WalkEvent<&QuoteBlock>, _ctx: &mut TraversalContext); + fn quote_block(&mut self, event: WalkEvent<&QuoteBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `CenterBlock` node - fn center_block(&mut self, _event: WalkEvent<&CenterBlock>, _ctx: &mut TraversalContext); + fn center_block(&mut self, event: WalkEvent<&CenterBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `VerseBlock` node - fn verse_block(&mut self, _event: WalkEvent<&VerseBlock>, _ctx: &mut TraversalContext); + fn verse_block(&mut self, event: WalkEvent<&VerseBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `CommentBlock` node - fn comment_block(&mut self, _event: WalkEvent<&CommentBlock>, _ctx: &mut TraversalContext); + fn comment_block(&mut self, event: WalkEvent<&CommentBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `ExampleBlock` node - fn example_block(&mut self, _event: WalkEvent<&ExampleBlock>, _ctx: &mut TraversalContext); + fn example_block(&mut self, event: WalkEvent<&ExampleBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `ExportBlock` node - fn export_block(&mut self, _event: WalkEvent<&ExportBlock>, _ctx: &mut TraversalContext); + fn export_block(&mut self, event: WalkEvent<&ExportBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `SourceBlock` node - fn source_block(&mut self, _event: WalkEvent<&SourceBlock>, _ctx: &mut TraversalContext); + fn source_block(&mut self, event: WalkEvent<&SourceBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `BabelCall` node - fn babel_call(&mut self, _event: WalkEvent<&BabelCall>, _ctx: &mut TraversalContext); + fn babel_call(&mut self, event: WalkEvent<&BabelCall>, ctx: &mut TraversalContext); /// Called when entering or leaving `Clock` node - fn clock(&mut self, _event: WalkEvent<&Clock>, _ctx: &mut TraversalContext); + fn clock(&mut self, event: WalkEvent<&Clock>, ctx: &mut TraversalContext); /// Called when entering or leaving `Cookie` node - fn cookie(&mut self, _event: WalkEvent<&Cookie>, _ctx: &mut TraversalContext); + fn cookie(&mut self, event: WalkEvent<&Cookie>, ctx: &mut TraversalContext); /// Called when entering or leaving `RadioTarget` node - fn radio_target(&mut self, _event: WalkEvent<&RadioTarget>, _ctx: &mut TraversalContext); + fn radio_target(&mut self, event: WalkEvent<&RadioTarget>, ctx: &mut TraversalContext); /// Called when entering or leaving `Drawer` node - fn drawer(&mut self, _event: WalkEvent<&Drawer>, _ctx: &mut TraversalContext); + fn drawer(&mut self, event: WalkEvent<&Drawer>, ctx: &mut TraversalContext); /// Called when entering or leaving `DynBlock` node - fn dyn_block(&mut self, _event: WalkEvent<&DynBlock>, _ctx: &mut TraversalContext); + fn dyn_block(&mut self, event: WalkEvent<&DynBlock>, ctx: &mut TraversalContext); /// Called when entering or leaving `FnDef` node - fn fn_def(&mut self, _event: WalkEvent<&FnDef>, _ctx: &mut TraversalContext); + fn fn_def(&mut self, event: WalkEvent<&FnDef>, ctx: &mut TraversalContext); /// Called when entering or leaving `FnRef` node - fn fn_ref(&mut self, _event: WalkEvent<&FnRef>, _ctx: &mut TraversalContext); + fn fn_ref(&mut self, event: WalkEvent<&FnRef>, ctx: &mut TraversalContext); /// Called when entering or leaving `Macros` node - fn macros(&mut self, _event: WalkEvent<&Macros>, _ctx: &mut TraversalContext); + fn macros(&mut self, event: WalkEvent<&Macros>, ctx: &mut TraversalContext); /// Called when entering or leaving `Snippet` node - fn snippet(&mut self, _event: WalkEvent<&Snippet>, _ctx: &mut TraversalContext); + fn snippet(&mut self, event: WalkEvent<&Snippet>, ctx: &mut TraversalContext); /// Called when entering or leaving `Timestamp` node - fn timestamp(&mut self, _event: WalkEvent<&Timestamp>, _ctx: &mut TraversalContext); + fn timestamp(&mut self, event: WalkEvent<&Timestamp>, ctx: &mut TraversalContext); /// Called when entering or leaving `Target` node - fn target(&mut self, _event: WalkEvent<&Target>, _ctx: &mut TraversalContext); + fn target(&mut self, event: WalkEvent<&Target>, ctx: &mut TraversalContext); /// Called when entering or leaving `FixedWidth` node - fn fixed_width(&mut self, _event: WalkEvent<&FixedWidth>, _ctx: &mut TraversalContext); + fn fixed_width(&mut self, event: WalkEvent<&FixedWidth>, ctx: &mut TraversalContext); /// Called when entering or leaving `OrgTable` node - fn org_table(&mut self, _event: WalkEvent<&OrgTable>, _ctx: &mut TraversalContext); + fn org_table(&mut self, event: WalkEvent<&OrgTable>, ctx: &mut TraversalContext); /// Called when entering or leaving `OrgTableRow` node - fn org_table_row(&mut self, _event: WalkEvent<&OrgTableRow>, _ctx: &mut TraversalContext); + fn org_table_row(&mut self, event: WalkEvent<&OrgTableRow>, ctx: &mut TraversalContext); /// Called when entering or leaving `OrgTableCell` node - fn org_table_cell(&mut self, _event: WalkEvent<&OrgTableCell>, _ctx: &mut TraversalContext); + fn org_table_cell(&mut self, event: WalkEvent<&OrgTableCell>, ctx: &mut TraversalContext); /// Called when entering or leaving `Link` node - fn link(&mut self, _event: WalkEvent<&Link>, _ctx: &mut TraversalContext); + fn link(&mut self, event: WalkEvent<&Link>, ctx: &mut TraversalContext); /// Called when entering or leaving `LatexFragment` node - fn latex_fragment(&mut self, _event: WalkEvent<&LatexFragment>, _ctx: &mut TraversalContext); + fn latex_fragment(&mut self, event: WalkEvent<&LatexFragment>, ctx: &mut TraversalContext); /// Called when entering or leaving `LatexEnvironment` node fn latex_environment( &mut self, - _event: WalkEvent<&LatexEnvironment>, - _ctx: &mut TraversalContext, + event: WalkEvent<&LatexEnvironment>, + ctx: &mut TraversalContext, ); /// Called when entering or leaving `Entity` node - fn entity(&mut self, _event: WalkEvent<&Entity>, _ctx: &mut TraversalContext); + fn entity(&mut self, event: WalkEvent<&Entity>, ctx: &mut TraversalContext); + /// Called when entering or leaving `LineBreak` node + fn line_break(&mut self, event: WalkEvent<&LineBreak>, ctx: &mut TraversalContext); } diff --git a/src/syntax/line_break.rs b/src/syntax/line_break.rs new file mode 100644 index 0000000..019fab5 --- /dev/null +++ b/src/syntax/line_break.rs @@ -0,0 +1,82 @@ +use nom::{ + branch::alt, + character::complete::{line_ending, space0}, + combinator::{eof, map}, + sequence::tuple, + IResult, +}; + +use crate::{ + syntax::combinator::{backslash_token, node}, + SyntaxKind, +}; + +use super::{combinator::GreenElement, input::Input}; + +pub fn line_break_node(input: Input) -> IResult { + debug_assert!(input.s.starts_with('\\')); + let mut parser = map( + tuple(( + backslash_token, + backslash_token, + space0, + alt((line_ending, eof)), + )), + |(b1, b2, ws, nl)| { + node( + SyntaxKind::LINE_BREAK, + [b1, b2, ws.ws_token(), nl.nl_token()], + ) + }, + ); + crate::lossless_parser!(parser, input) +} + +#[test] +fn parse() { + use crate::ast::LineBreak; + use crate::tests::to_ast; + + let to_line_break = to_ast::(line_break_node); + + insta::assert_debug_snapshot!( + to_line_break("\\\\\n").syntax, + @r###" + LINE_BREAK@0..3 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..2 "" + NEW_LINE@2..3 "\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\ \n").syntax, + @r###" + LINE_BREAK@0..6 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..5 " " + NEW_LINE@5..6 "\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\\r\n").syntax, + @r###" + LINE_BREAK@0..4 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..2 "" + NEW_LINE@2..4 "\r\n" + "### + ); + insta::assert_debug_snapshot!( + to_line_break("\\\\ ").syntax, + @r###" + LINE_BREAK@0..6 + BACKSLASH@0..1 "\\" + BACKSLASH@1..2 "\\" + WHITESPACE@2..6 " " + NEW_LINE@6..6 "" + "### + ); +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index a77423f..5a60467 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -21,6 +21,7 @@ pub mod input; pub mod keyword; pub mod latex_environment; pub mod latex_fragment; +pub mod line_break; pub mod link; pub mod list; pub mod macros; @@ -184,6 +185,7 @@ pub enum SyntaxKind { INLINE_SRC, LINK, LINK_PATH, + LINE_BREAK, COOKIE, RADIO_TARGET, FN_REF, @@ -234,6 +236,7 @@ impl SyntaxKind { | SyntaxKind::FN_REF | SyntaxKind::INLINE_CALL | SyntaxKind::INLINE_SRC + | SyntaxKind::LINE_BREAK | SyntaxKind::LINK | SyntaxKind::MACROS | SyntaxKind::RADIO_TARGET diff --git a/src/syntax/object.rs b/src/syntax/object.rs index 961ad05..071b42f 100644 --- a/src/syntax/object.rs +++ b/src/syntax/object.rs @@ -10,6 +10,7 @@ use super::{ inline_src::inline_src_node, input::Input, latex_fragment::latex_fragment_node, + line_break::line_break_node, link::link_node, macros::macros_node, radio_target::radio_target_node, @@ -110,10 +111,10 @@ impl<'a> Iterator for ObjectPositions<'a> { /// - Statistics Cookies /// - Timestamps /// - Text Markup (bold code strike verbatim underline italic) +/// - Line Breaks /// /// // todo: /// - Citations -/// - Line Breaks /// - Subscript and Superscript pub fn object_nodes(input: Input) -> Vec { // TODO: @@ -124,11 +125,6 @@ pub fn object_nodes(input: Input) -> Vec { 'l: while !i.is_empty() { for (input, head) in ObjectPositions::standard(i) { - debug_assert!( - input.s.len() >= 2, - "object must have at least two characters: {:?}", - input.s - ); if let Ok((input, node)) = standard_object_node(input) { if !head.is_empty() { nodes.push(head.text_token()) @@ -170,11 +166,6 @@ pub fn minimal_object_nodes(input: Input) -> Vec { 'l: while !i.is_empty() { for (input, head) in ObjectPositions::minimal(i) { - debug_assert!( - input.s.len() >= 2, - "object must have at least two characters: {:?}", - input.s - ); if let Ok((input, node)) = minimal_object_node(input) { if !head.is_empty() { nodes.push(head.text_token()) @@ -205,6 +196,12 @@ pub fn minimal_object_nodes(input: Input) -> Vec { /// parse an object from standard sets fn standard_object_node(i: Input) -> IResult { + debug_assert!( + i.s.len() >= 2, + "object must have at least two characters: {:?}", + i.s + ); + match &i.as_bytes()[0] { b'*' => bold_node(i), b'+' => strike_node(i), @@ -225,7 +222,13 @@ fn standard_object_node(i: Input) -> IResult { b'c' => inline_call_node(i), b's' => inline_src_node(i), b'$' => latex_fragment_node(i), - b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)), + b'\\' => { + if i.as_bytes()[1] == b'\\' { + line_break_node(i) + } else { + entity_node(i).or_else(|_| latex_fragment_node(i)) + } + } _ => Err(nom::Err::Error(())), } } diff --git a/tests/html.rs b/tests/html.rs index 82181b7..ab1a60e 100644 --- a/tests/html.rs +++ b/tests/html.rs @@ -166,3 +166,11 @@ fn table() { @"
" ); } + +#[test] +fn line_break() { + insta::assert_debug_snapshot!( + Org::parse("aa\\\\\nbb").to_html(), + @r###""

aa
bb

""### + ); +}