From de4ff9aa618d45befc7f1051d5384e1227382e15 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Thu, 21 Dec 2023 04:47:49 +0800 Subject: [PATCH] feat(common): list formatting --- orgize-common/src/formatting.rs | 133 -------------------- orgize-common/src/formatting/blank_lines.rs | 65 ++++++++++ orgize-common/src/formatting/list.rs | 102 +++++++++++++++ orgize-common/src/formatting/mod.rs | 117 +++++++++++++++++ orgize-common/src/formatting/rule.rs | 37 ++++++ orgize/src/ast/mod.rs | 16 +++ 6 files changed, 337 insertions(+), 133 deletions(-) delete mode 100644 orgize-common/src/formatting.rs create mode 100644 orgize-common/src/formatting/blank_lines.rs create mode 100644 orgize-common/src/formatting/list.rs create mode 100644 orgize-common/src/formatting/mod.rs create mode 100644 orgize-common/src/formatting/rule.rs diff --git a/orgize-common/src/formatting.rs b/orgize-common/src/formatting.rs deleted file mode 100644 index d37ef4c..0000000 --- a/orgize-common/src/formatting.rs +++ /dev/null @@ -1,133 +0,0 @@ -use orgize::{ - ast::Rule, - export::{Container, Event, TraversalContext, Traverser}, - rowan::ast::AstNode, - Org, SyntaxKind, SyntaxNode, -}; - -pub fn formatting(org: &Org) -> Vec<(usize, usize, String)> { - let mut format = FormattingTraverser { edits: vec![] }; - - org.traverse(&mut format); - - format.edits -} - -struct FormattingTraverser { - edits: Vec<(usize, usize, String)>, -} - -impl Traverser for FormattingTraverser { - fn event(&mut self, event: Event, _: &mut TraversalContext) { - match event { - Event::Rule(rule) => { - format_rule(&rule, &mut self.edits); - format_blank_lines(rule.syntax(), &mut self.edits); - } - Event::Clock(clock) => { - format_blank_lines(clock.syntax(), &mut self.edits); - } - - Event::Enter(Container::Document(document)) => { - format_blank_lines(document.syntax(), &mut self.edits); - } - Event::Enter(Container::Paragraph(paragraph)) => { - format_blank_lines(paragraph.syntax(), &mut self.edits); - } - Event::Enter(Container::List(list)) => { - format_blank_lines(list.syntax(), &mut self.edits); - } - Event::Enter(Container::OrgTable(table)) => { - format_blank_lines(table.syntax(), &mut self.edits); - } - Event::Enter(Container::SpecialBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::QuoteBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::CenterBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::VerseBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::CommentBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::ExampleBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - Event::Enter(Container::ExportBlock(block)) => { - format_blank_lines(block.syntax(), &mut self.edits); - } - - _ => {} - } - } -} - -fn format_rule(rule: &Rule, edits: &mut Vec<(usize, usize, String)>) { - let node = rule.syntax(); - - for token in node.children_with_tokens().filter_map(|e| e.into_token()) { - if token.kind() == SyntaxKind::WHITESPACE && !token.text().is_empty() { - edits.push(( - token.text_range().start().into(), - token.text_range().end().into(), - "".into(), - )); - } - - if token.kind() == SyntaxKind::TEXT && token.text().len() != 5 { - edits.push(( - token.text_range().start().into(), - token.text_range().end().into(), - "-----".into(), - )); - } - - if token.kind() == SyntaxKind::NEW_LINE && token.text() != "\n" { - edits.push(( - token.text_range().start().into(), - token.text_range().end().into(), - "\n".into(), - )); - } - } -} - -fn format_blank_lines(node: &SyntaxNode, edits: &mut Vec<(usize, usize, String)>) { - let mut blank_lines = node - .children_with_tokens() - .filter_map(|e| e.into_token()) - .filter(|n| n.kind() == SyntaxKind::BLANK_LINE); - - if let Some(line) = blank_lines.next() { - if line.text() != "\n" { - edits.push(( - line.text_range().start().into(), - line.text_range().end().into(), - "\n".into(), - )); - } - } - - match (blank_lines.next(), blank_lines.last()) { - (Some(first), Some(last)) => { - edits.push(( - first.text_range().start().into(), - last.text_range().end().into(), - "".into(), - )); - } - (Some(first), None) => { - edits.push(( - first.text_range().start().into(), - first.text_range().end().into(), - "".into(), - )); - } - _ => {} - } -} diff --git a/orgize-common/src/formatting/blank_lines.rs b/orgize-common/src/formatting/blank_lines.rs new file mode 100644 index 0000000..e3d469a --- /dev/null +++ b/orgize-common/src/formatting/blank_lines.rs @@ -0,0 +1,65 @@ +use orgize::{SyntaxKind, SyntaxNode}; + +pub fn format(node: &SyntaxNode, edits: &mut Vec<(usize, usize, String)>) { + let mut blank_lines = node + .children_with_tokens() + .filter_map(|e| e.into_token()) + .filter(|n| n.kind() == SyntaxKind::BLANK_LINE); + + let Some(first_line) = blank_lines.next() else { + return; + }; + + if first_line.text() != "\n" { + edits.push(( + first_line.text_range().start().into(), + first_line.text_range().end().into(), + "\n".into(), + )); + } + + match (blank_lines.next(), blank_lines.last()) { + (Some(first), Some(last)) => { + edits.push(( + first.text_range().start().into(), + last.text_range().end().into(), + "".into(), + )); + } + (Some(first), None) => { + edits.push(( + first.text_range().start().into(), + first.text_range().end().into(), + "".into(), + )); + } + _ => {} + } +} + +#[test] +fn test() { + use crate::test_case; + use orgize::ast::SourceBlock; + + test_case!( + SourceBlock, + "#+begin_src\n#+end_src\n\r\n\n\r", + format, + "#+begin_src\n#+end_src\n\n" + ); + + test_case!( + SourceBlock, + "#+begin_src\n#+end_src\n", + format, + "#+begin_src\n#+end_src\n" + ); + + test_case!( + SourceBlock, + "#+begin_src\n#+end_src", + format, + "#+begin_src\n#+end_src" + ); +} diff --git a/orgize-common/src/formatting/list.rs b/orgize-common/src/formatting/list.rs new file mode 100644 index 0000000..69d401e --- /dev/null +++ b/orgize-common/src/formatting/list.rs @@ -0,0 +1,102 @@ +use std::iter::once; + +use orgize::{ast::ListItem, rowan::ast::AstNode, SyntaxNode}; + +pub fn format(node: &SyntaxNode, indent_level: usize, edits: &mut Vec<(usize, usize, String)>) { + let mut items = node.children().filter_map(ListItem::cast); + + let Some(first_item) = items.next() else { + return; + }; + + match first_item.bullet().trim_end() { + expected_bullet @ ("-" | "+" | "*") => { + if first_item.indent() != 3 * indent_level { + edits.push(( + first_item.begin() as usize, + first_item.begin() as usize + first_item.indent(), + " ".repeat(3 * indent_level), + )); + } + + for item in items { + if item.indent() != 3 * indent_level { + edits.push(( + item.begin() as usize, + item.begin() as usize + item.indent(), + " ".repeat(3 * indent_level), + )); + } + + let bullet = item.bullet(); + let s = bullet.trim_end(); + if s != expected_bullet { + edits.push(( + bullet.start() as usize, + bullet.start() as usize + s.len(), + expected_bullet.to_string(), + )); + } + } + } + b => { + let c = if b.ends_with(')') { ')' } else { '.' }; + + for (index, item) in once(first_item).chain(items).enumerate() { + if item.indent() != 3 * indent_level { + edits.push(( + item.begin() as usize, + item.begin() as usize + item.indent(), + " ".repeat(3 * indent_level), + )); + } + + let expected_bullet = format!("{}{c}", index + 1); + let bullet = item.bullet(); + let s = bullet.trim_end(); + if s != expected_bullet { + edits.push(( + bullet.start() as usize, + bullet.start() as usize + s.len(), + expected_bullet, + )); + } + } + } + } +} + +#[test] +fn test() { + use crate::test_case; + use orgize::ast::List; + + let format0 = + |node: &SyntaxNode, edits: &mut Vec<(usize, usize, String)>| format(node, 0, edits); + + let format2 = + |node: &SyntaxNode, edits: &mut Vec<(usize, usize, String)>| format(node, 2, edits); + + test_case!(List, "1. item", format0, "1. item"); + + test_case!( + List, + "0. item\n- item\n+ item", + format0, + "1. item\n2. item\n3. item" + ); + + test_case!( + List, + " + item\n - item\n 1. item", + format0, + "+ item\n+ item\n+ item" + ); + + test_case!( + List, + " + item\n - item\n 1. item", + format2, + " + item\n + item\n + item" + ); +} diff --git a/orgize-common/src/formatting/mod.rs b/orgize-common/src/formatting/mod.rs new file mode 100644 index 0000000..e21d9f2 --- /dev/null +++ b/orgize-common/src/formatting/mod.rs @@ -0,0 +1,117 @@ +use orgize::{ + export::{Container, Event, TraversalContext, Traverser}, + rowan::ast::AstNode, + Org, +}; + +mod blank_lines; +mod list; +mod rule; + +pub fn formatting(org: &Org) -> Vec<(usize, usize, String)> { + let mut format = FormattingTraverser::default(); + + org.traverse(&mut format); + + format.edits +} + +#[derive(Default)] +struct FormattingTraverser { + indent_level: usize, + edits: Vec<(usize, usize, String)>, +} + +impl Traverser for FormattingTraverser { + fn event(&mut self, event: Event, _: &mut TraversalContext) { + match event { + Event::Rule(rule) => { + rule::format(rule.syntax(), &mut self.edits); + blank_lines::format(rule.syntax(), &mut self.edits); + } + Event::Clock(clock) => { + blank_lines::format(clock.syntax(), &mut self.edits); + } + + Event::Enter(Container::Document(document)) => { + blank_lines::format(document.syntax(), &mut self.edits); + } + Event::Enter(Container::Paragraph(paragraph)) => { + blank_lines::format(paragraph.syntax(), &mut self.edits); + } + Event::Enter(Container::List(list)) => { + list::format(list.syntax(), self.indent_level, &mut self.edits); + blank_lines::format(list.syntax(), &mut self.edits); + self.indent_level += 1; + } + Event::Leave(Container::List(_)) => { + self.indent_level -= 1; + } + Event::Enter(Container::OrgTable(table)) => { + blank_lines::format(table.syntax(), &mut self.edits); + } + Event::Enter(Container::SpecialBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::QuoteBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::CenterBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::VerseBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::CommentBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::ExampleBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::ExportBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + Event::Enter(Container::SourceBlock(block)) => { + blank_lines::format(block.syntax(), &mut self.edits); + } + + _ => {} + } + } +} + +#[cfg(test)] +#[macro_export] +macro_rules! test_case { + ( + $n:tt, + $input:expr, + $fn:expr, + $expected:expr + ) => {{ + use orgize::rowan::ast::AstNode; + + let org = orgize::Org::parse($input); + let node = org.first_node::<$n>().unwrap(); + let node = node.syntax(); + + let mut patches = vec![]; + + $fn(&node, &mut patches); + + let input = node.to_string(); + + patches.sort_by(|a, b| a.0.cmp(&b.0)); + + let mut i = 0; + let mut output = String::new(); + for (start, end, text) in patches { + output.push_str(&input[i..start]); + output.push_str(&text); + i = end; + } + output.push_str(&input[i..]); + + assert_eq!(output, $expected); + }}; +} diff --git a/orgize-common/src/formatting/rule.rs b/orgize-common/src/formatting/rule.rs new file mode 100644 index 0000000..4a51c31 --- /dev/null +++ b/orgize-common/src/formatting/rule.rs @@ -0,0 +1,37 @@ +use orgize::{SyntaxKind, SyntaxNode}; + +pub fn format(node: &SyntaxNode, edits: &mut Vec<(usize, usize, String)>) { + for token in node.children_with_tokens().filter_map(|e| e.into_token()) { + if token.kind() == SyntaxKind::WHITESPACE && !token.text().is_empty() { + edits.push(( + token.text_range().start().into(), + token.text_range().end().into(), + "".into(), + )); + } + + if token.kind() == SyntaxKind::TEXT && token.text().len() != 5 { + edits.push(( + token.text_range().start().into(), + token.text_range().end().into(), + "-----".into(), + )); + } + + if token.kind() == SyntaxKind::NEW_LINE && token.text() != "\n" { + edits.push(( + token.text_range().start().into(), + token.text_range().end().into(), + "\n".into(), + )); + } + } +} + +#[test] +fn test() { + use crate::test_case; + use orgize::ast::Rule; + + test_case!(Rule, " ------------\r\n", format, "-----\n"); +} diff --git a/orgize/src/ast/mod.rs b/orgize/src/ast/mod.rs index 57ee108..3f958ea 100644 --- a/orgize/src/ast/mod.rs +++ b/orgize/src/ast/mod.rs @@ -76,6 +76,22 @@ pub fn filter_token( #[derive(Default, Eq)] pub struct Token(pub(crate) Option); +impl Token { + pub fn start(&self) -> u32 { + match &self.0 { + Some(t) => t.text_range().start().into(), + None => 0, + } + } + + pub fn end(&self) -> u32 { + match &self.0 { + Some(t) => t.text_range().end().into(), + None => 0, + } + } +} + impl AsRef for Token { fn as_ref(&self) -> &str { match &self.0 {