chore: add orgize-{cli,common,lsp} package

This commit is contained in:
PoiScript 2023-12-20 21:56:10 +08:00
parent 6930640866
commit 4cc1130a17
No known key found for this signature in database
GPG key ID: 22C2B1249D99985E
131 changed files with 6577 additions and 56 deletions

View file

@ -1,366 +0,0 @@
use nom::{IResult, InputTake};
use super::{
combinator::GreenElement,
cookie::cookie_node,
emphasis::{
self, bold_node, code_node, italic_node, strike_node, underline_node, verbatim_node,
},
entity::entity_node,
fn_ref::fn_ref_node,
inline_call::inline_call_node,
inline_src::inline_src_node,
input::Input,
latex_fragment::latex_fragment_node,
line_break::line_break_node,
link::link_node,
macros::macros_node,
radio_target::radio_target_node,
snippet::snippet_node,
subscript_superscript::{self, subscript_node, superscript_node},
target::target_node,
timestamp::{timestamp_active_node, timestamp_diary_node, timestamp_inactive_node},
};
struct ObjectPositions<'a> {
input: Input<'a>,
pos: usize,
finder: jetscii::BytesConst,
}
impl ObjectPositions<'_> {
fn standard(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'@', /* snippet */
b'<', /* timestamp, target, radio target */
b'[', /* link, cookie, fn_ref, timestamp */
b'c', /* inline call */
b's', /* inline source */
b'\\', b'$', /* latex & entity */
b'{', /* macros */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
fn minimal(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
fn link_description(input: Input) -> ObjectPositions {
ObjectPositions {
input,
pos: 0,
finder: jetscii::bytes!(
b'*', b'+', b'/', b'_', b'=', b'~', /* text markup */
b'\\', b'$', /* latex & entity */
b'@', /* snippet */
b'c', /* inline call */
b's', /* inline source */
b'{', /* macros */
b'[', /* cookie */
b'^', /* superscript */
b'_' /* subscript */
),
}
}
}
impl<'a> Iterator for ObjectPositions<'a> {
type Item = (Input<'a>, Input<'a>);
fn next(&mut self) -> Option<Self::Item> {
if self.input.len() < 2 || self.pos >= self.input.len() {
return None;
}
let previous = self.pos;
let i = self.finder.find(&self.input.as_bytes()[self.pos..])?;
let p = self.pos + i;
self.pos = p + 1;
debug_assert!(
previous < self.pos && self.pos <= self.input.s.len(),
"{} < {} < {}",
previous,
self.pos,
self.input.s.len()
);
// a valid object requires at least two characters
if self.input.s.len() - p < 2 {
return None;
}
Some(self.input.take_split(p))
}
}
/// parse minimal sets of objects, including
/// - LaTeX fragments ('\\')
/// - Text markup (bold code strike verbatim underline italic) ('*', '~', '+', '=', '_', '/')
/// - Entities ('\\')
/// - Superscripts and Subscripts
pub fn minimal_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::minimal,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
/// parses standard sets of objects, including
///
/// - Entities
/// - LaTeX Fragments
/// - Export Snippets
/// - Footnote References
/// - Inline Babel Calls
/// - Inline Source Blocks
/// - Links
/// - Macros
/// - Targets and Radio Targets
/// - Statistics Cookies
/// - Timestamps
/// - Text Markup (bold code strike verbatim underline italic)
/// - Line Breaks
/// - Subscript and Superscript
///
/// // todo:
/// - Citations
pub fn standard_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::standard,
|i: Input, pre: Input| match &i.as_bytes()[0] {
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'@' => snippet_node(i),
b'{' => macros_node(i),
b'<' => radio_target_node(i)
.or_else(|_| target_node(i))
.or_else(|_| timestamp_diary_node(i))
.or_else(|_| timestamp_active_node(i)),
b'[' => cookie_node(i)
.or_else(|_| link_node(i))
.or_else(|_| fn_ref_node(i))
.or_else(|_| timestamp_inactive_node(i)),
// NOTE: although not specified in document, inline call and inline src follows the
// same pre tokens rule as text markup
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'$' => latex_fragment_node(i),
b'\\' if !pre.s.ends_with('\\') && i.as_bytes()[1] == b'\\' => line_break_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
pub fn link_description_object_nodes(input: Input) -> Vec<GreenElement> {
object_nodes(
ObjectPositions::link_description,
|i: Input<'_>, pre: Input<'_>| match &i.as_bytes()[0] {
b'@' => snippet_node(i),
b'c' if emphasis::verify_pre(pre.s) => inline_call_node(i),
b's' if emphasis::verify_pre(pre.s) => inline_src_node(i),
b'{' => macros_node(i),
b'[' => cookie_node(i),
b'*' if emphasis::verify_pre(pre.s) => bold_node(i),
b'+' if emphasis::verify_pre(pre.s) => strike_node(i),
b'/' if emphasis::verify_pre(pre.s) => italic_node(i),
b'_' if emphasis::verify_pre(pre.s) => underline_node(i),
b'=' if emphasis::verify_pre(pre.s) => verbatim_node(i),
b'~' if emphasis::verify_pre(pre.s) => code_node(i),
b'$' => latex_fragment_node(i),
b'\\' => entity_node(i).or_else(|_| latex_fragment_node(i)),
b'^' if subscript_superscript::verify_pre(pre.s) => superscript_node(i),
b'_' if subscript_superscript::verify_pre(pre.s) => subscript_node(i),
_ => Err(nom::Err::Error(())),
},
input,
)
}
fn object_nodes<'a, F, P>(position: F, parse: P, input: Input<'a>) -> Vec<GreenElement>
where
F: Fn(Input) -> ObjectPositions,
P: Fn(Input<'a>, Input<'a>) -> IResult<Input<'a>, GreenElement, ()>,
{
let mut i = input;
let mut nodes = vec![];
'l: while !i.is_empty() {
for (input, head) in position(i) {
debug_assert!(
input.s.len() >= 2,
"object must have at least two characters: {:?}",
input.s
);
if let Ok((input, pre)) = parse(input, head) {
if !head.is_empty() {
nodes.push(head.text_token())
}
nodes.push(pre);
debug_assert!(input.len() < i.len(), "{} < {}", input.len(), i.len());
i = input;
continue 'l;
}
}
nodes.push(i.text_token());
break;
}
debug_assert_eq!(
input.as_str(),
nodes.iter().fold(String::new(), |s, i| s + &i.to_string()),
"parser must be lossless"
);
nodes
}
#[test]
fn positions() {
let config = crate::ParseConfig::default();
let vec = ObjectPositions::standard(("*", &config).into()).collect::<Vec<_>>();
assert!(vec.is_empty());
let vec = ObjectPositions::standard(("*{", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "*{");
// https://github.com/PoiScript/orgize/issues/69
let vec = ObjectPositions::standard(("{3}", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 1);
assert_eq!(vec[0].0.s, "{3}");
let vec = ObjectPositions::standard(("*{()}//s\nc<<", &config).into()).collect::<Vec<_>>();
assert_eq!(vec.len(), 7);
assert_eq!(vec[0].0.s, "*{()}//s\nc<<");
assert_eq!(vec[1].0.s, "{()}//s\nc<<");
assert_eq!(vec[2].0.s, "//s\nc<<");
assert_eq!(vec[3].0.s, "/s\nc<<");
assert_eq!(vec[4].0.s, "s\nc<<");
assert_eq!(vec[5].0.s, "c<<");
assert_eq!(vec[6].0.s, "<<");
}
#[test]
fn parse() {
use crate::{
syntax::{combinator::node, SyntaxKind, SyntaxNode},
ParseConfig,
};
let t = |input: &str| {
let config = &ParseConfig::default();
let children = standard_object_nodes((input, config).into());
SyntaxNode::new_root(node(SyntaxKind::PARAGRAPH, children).into_node().unwrap())
};
insta::assert_debug_snapshot!(
t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"),
@r###"
PARAGRAPH@0..93
CODE@0..26
TILDE@0..1 "~"
TEXT@1..25 "org-inlinetask-min-level"
TILDE@25..26 "~"
FN_REF@26..93
L_BRACKET@26..27 "["
TEXT@27..29 "fn"
COLON@29..30 ":"
TEXT@30..34 "oiml"
COLON@34..35 ":"
TEXT@35..57 "The default value of \n"
CODE@57..83
TILDE@57..58 "~"
TEXT@58..82 "org-inlinetask-min-level"
TILDE@82..83 "~"
TEXT@83..87 " is "
VERBATIM@87..91
EQUAL@87..88 "="
TEXT@88..90 "15"
EQUAL@90..91 "="
TEXT@91..92 "."
R_BRACKET@92..93 "]"
"###
);
insta::assert_debug_snapshot!(
t(r#"Org is a /plaintext markup syntax/ developed with *Emacs* in 2003.
The canonical parser is =org-element.el=, which provides a number of
functions starting with ~org-element-~."#),
@r###"
PARAGRAPH@0..175
TEXT@0..9 "Org is a "
ITALIC@9..34
SLASH@9..10 "/"
TEXT@10..33 "plaintext markup syntax"
SLASH@33..34 "/"
TEXT@34..50 " developed with "
BOLD@50..57
STAR@50..51 "*"
TEXT@51..56 "Emacs"
STAR@56..57 "*"
TEXT@57..91 " in 2003.\nThe canonic ..."
VERBATIM@91..107
EQUAL@91..92 "="
TEXT@92..106 "org-element.el"
EQUAL@106..107 "="
TEXT@107..160 ", which provides a nu ..."
CODE@160..174
TILDE@160..161 "~"
TEXT@161..173 "org-element-"
TILDE@173..174 "~"
TEXT@174..175 "."
"###
);
insta::assert_debug_snapshot!(
t("a^abc"),
@r###"
PARAGRAPH@0..5
TEXT@0..1 "a"
SUPERSCRIPT@1..5
CARET@1..2 "^"
TEXT@2..5 "abc"
"###
);
}