feat: block parsing
This commit is contained in:
parent
b2123d1acd
commit
27de7ee68c
10 changed files with 208 additions and 168 deletions
153
src/ast/block.rs
153
src/ast/block.rs
|
|
@ -1,112 +1,89 @@
|
|||
use crate::{SyntaxKind, SyntaxNode};
|
||||
use crate::SyntaxKind;
|
||||
|
||||
use super::{filter_token, SourceBlock, Token};
|
||||
|
||||
fn argument(node: &SyntaxNode, name: &str) -> Option<Token> {
|
||||
node.children()
|
||||
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
|
||||
.and_then(|n| {
|
||||
let mut iter = n
|
||||
.children_with_tokens()
|
||||
.filter_map(filter_token(SyntaxKind::TEXT))
|
||||
.skip_while(|n| n != name);
|
||||
|
||||
iter.next()?;
|
||||
|
||||
Some(iter.next().unwrap_or_default())
|
||||
})
|
||||
}
|
||||
use super::{filter_token, ExportBlock, SourceBlock, Token};
|
||||
|
||||
impl SourceBlock {
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.language(), "c");
|
||||
/// assert_eq!(block.language().unwrap(), "c");
|
||||
/// let block = Org::parse("#+begin_src javascript \n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.language(), "javascript");
|
||||
/// assert_eq!(block.language().unwrap(), "javascript");
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.language(), "");
|
||||
/// assert!(block.language().is_none());
|
||||
/// ````
|
||||
pub fn language(&self) -> Token {
|
||||
pub fn language(&self) -> Option<Token> {
|
||||
self.syntax
|
||||
.children()
|
||||
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
|
||||
.and_then(|n| {
|
||||
n.children_with_tokens()
|
||||
.filter_map(filter_token(SyntaxKind::TEXT))
|
||||
.nth(2)
|
||||
})
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.flat_map(|n| n.children_with_tokens())
|
||||
.find_map(filter_token(SyntaxKind::SRC_BLOCK_LANGUAGE))
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src emacs-lisp -n 20\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.switches().unwrap(), "-n 20");
|
||||
/// let block = Org::parse("#+begin_src emacs-lisp -n 20 -r :tangle yes \n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.switches().unwrap(), "-n 20 -r");
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src emacs-lisp\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.switches().is_none());
|
||||
/// let block = Org::parse("#+begin_src\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.switches().is_none());
|
||||
/// let block = Org::parse("#+begin_src :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.switches().is_none());
|
||||
/// ````
|
||||
pub fn switches(&self) -> Option<Token> {
|
||||
self.syntax
|
||||
.children()
|
||||
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
|
||||
.into_iter()
|
||||
.flat_map(|n| n.children_with_tokens())
|
||||
.find_map(filter_token(SyntaxKind::SRC_BLOCK_SWITCHES))
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c :tangle yes\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.tangle().unwrap(), "yes");
|
||||
/// let block = Org::parse("#+begin_src c :tangle\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.tangle().unwrap(), "");
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.tangle().is_none());
|
||||
/// ````
|
||||
pub fn tangle(&self) -> Option<Token> {
|
||||
argument(&self.syntax, ":tangle")
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
/// assert_eq!(block.parameters().unwrap(), ":tangle yes");
|
||||
/// let block = Org::parse("#+begin_src c :tangle \n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.parameters().unwrap(), ":tangle");
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c :mkdir yes\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.mkdir().unwrap(), "yes");
|
||||
/// let block = Org::parse("#+begin_src c :mkdir\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.mkdir().unwrap(), "");
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.mkdir().is_none());
|
||||
/// assert!(block.parameters().is_none());
|
||||
/// ````
|
||||
pub fn mkdir(&self) -> Option<Token> {
|
||||
argument(&self.syntax, ":mkdir")
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c :comments both\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.comments().unwrap(), "both");
|
||||
/// let block = Org::parse("#+begin_src c :comments\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.comments().unwrap(), "");
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.comments().is_none());
|
||||
/// ````
|
||||
pub fn comments(&self) -> Option<Token> {
|
||||
argument(&self.syntax, ":comments")
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c :padline yes\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.padline().unwrap(), "yes");
|
||||
/// let block = Org::parse("#+begin_src c :padline\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.padline().unwrap(), "");
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.padline().is_none());
|
||||
/// ````
|
||||
pub fn padline(&self) -> Option<Token> {
|
||||
argument(&self.syntax, ":padline")
|
||||
}
|
||||
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::SourceBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_src c :tangle-mode o444\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.tangle_mode().unwrap(), "o444");
|
||||
/// let block = Org::parse("#+begin_src c :tangle-mode\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert_eq!(block.tangle_mode().unwrap(), "");
|
||||
/// let block = Org::parse("#+begin_src c\n#+end_src").first_node::<SourceBlock>().unwrap();
|
||||
/// assert!(block.tangle_mode().is_none());
|
||||
/// ````
|
||||
pub fn tangle_mode(&self) -> Option<Token> {
|
||||
argument(&self.syntax, ":tangle-mode")
|
||||
pub fn parameters(&self) -> Option<Token> {
|
||||
self.syntax
|
||||
.children()
|
||||
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
|
||||
.into_iter()
|
||||
.flat_map(|n| n.children_with_tokens())
|
||||
.find_map(filter_token(SyntaxKind::SRC_BLOCK_PARAMETERS))
|
||||
}
|
||||
}
|
||||
|
||||
impl ExportBlock {
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::ExportBlock};
|
||||
///
|
||||
/// let block = Org::parse("#+begin_export html\n#+end_export").first_node::<ExportBlock>().unwrap();
|
||||
/// assert_eq!(block.ty().unwrap(), "html");
|
||||
///
|
||||
/// let block = Org::parse("#+begin_export\n#+end_export").first_node::<ExportBlock>().unwrap();
|
||||
/// assert!(block.ty().is_none());
|
||||
/// ````
|
||||
pub fn ty(&self) -> Option<Token> {
|
||||
self.syntax
|
||||
.children()
|
||||
.find(|e| e.kind() == SyntaxKind::BLOCK_BEGIN)
|
||||
.into_iter()
|
||||
.flat_map(|n| n.children_with_tokens())
|
||||
.find_map(filter_token(SyntaxKind::EXPORT_BLOCK_TYPE))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ const nodes = [
|
|||
first_child: [
|
||||
["section", "Section"],
|
||||
["planning", "Planning"],
|
||||
["properties", "PropertyDrawer"],
|
||||
],
|
||||
children: [["headlines", "Headline"]],
|
||||
post_blank: true,
|
||||
|
|
|
|||
|
|
@ -160,6 +160,9 @@ impl Headline {
|
|||
pub fn planning(&self) -> Option<Planning> {
|
||||
support::child(&self.syntax)
|
||||
}
|
||||
pub fn properties(&self) -> Option<PropertyDrawer> {
|
||||
support::child(&self.syntax)
|
||||
}
|
||||
pub fn headlines(&self) -> AstChildren<Headline> {
|
||||
support::children(&self.syntax)
|
||||
}
|
||||
|
|
|
|||
39
src/ast/keyword.rs
Normal file
39
src/ast/keyword.rs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
use crate::SyntaxKind;
|
||||
|
||||
use super::{filter_token, Keyword, Token};
|
||||
|
||||
impl Keyword {
|
||||
///
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::Keyword};
|
||||
///
|
||||
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
|
||||
/// assert_eq!(keyword.key(), "KEY");
|
||||
/// ```
|
||||
pub fn key(&self) -> Token {
|
||||
self.syntax
|
||||
.children_with_tokens()
|
||||
.find_map(filter_token(SyntaxKind::TEXT))
|
||||
.unwrap_or_else(|| {
|
||||
debug_assert!(false, "keyword must contains TEXT");
|
||||
Token::default()
|
||||
})
|
||||
}
|
||||
|
||||
///
|
||||
/// ```rust
|
||||
/// use orgize::{Org, ast::Keyword};
|
||||
///
|
||||
/// let keyword = Org::parse("#+KEY: VALUE\nabc").first_node::<Keyword>().unwrap();
|
||||
/// assert_eq!(keyword.value(), " VALUE");
|
||||
/// let keyword = Org::parse("#+KEY:").first_node::<Keyword>().unwrap();
|
||||
/// assert_eq!(keyword.value(), "");
|
||||
/// ```
|
||||
pub fn value(&self) -> Token {
|
||||
self.syntax
|
||||
.children_with_tokens()
|
||||
.filter_map(filter_token(SyntaxKind::TEXT))
|
||||
.nth(1)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ mod entity;
|
|||
mod headline;
|
||||
mod inline_call;
|
||||
mod inline_src;
|
||||
mod keyword;
|
||||
mod link;
|
||||
mod list;
|
||||
mod macros;
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ pub trait Traverser {
|
|||
SUBSCRIPT => walk!(Subscript),
|
||||
KEYWORD => walk!(Keyword),
|
||||
PROPERTY_DRAWER => walk!(PropertyDrawer),
|
||||
NODE_PROPERTY => {}
|
||||
BLOCK_CONTENT | LIST_ITEM_CONTENT => {
|
||||
for child in node.children_with_tokens() {
|
||||
self.element(child, ctx);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
use jetscii::Substring;
|
||||
use nom::{
|
||||
bytes::complete::{tag, tag_no_case, take_while1},
|
||||
character::complete::{space0, space1},
|
||||
sequence::tuple,
|
||||
IResult, InputTake,
|
||||
branch::alt,
|
||||
bytes::complete::{tag, tag_no_case, take_while, take_while1},
|
||||
character::complete::{alpha1, space0, space1},
|
||||
combinator::{cond, opt},
|
||||
sequence::{separated_pair, tuple},
|
||||
IResult, InputLength, InputTake,
|
||||
};
|
||||
|
||||
use super::{
|
||||
|
|
@ -52,22 +53,91 @@ fn block_node_base(input: Input) -> IResult<Input, GreenElement, ()> {
|
|||
}
|
||||
|
||||
fn block_begin_node(input: Input) -> IResult<Input, (GreenElement, &str), ()> {
|
||||
let (input, (ws, start, name, (argument, ws_, nl))) = tuple((
|
||||
space0,
|
||||
tag_no_case("#+BEGIN_"),
|
||||
take_while1(|c| c != ' ' && c != '\t' && c != '\r' && c != '\n'),
|
||||
trim_line_end,
|
||||
))(input)?;
|
||||
let (input, (ws1, begin, name)) = tuple((space0, tag_no_case("#+BEGIN_"), alpha1))(input)?;
|
||||
|
||||
let mut b = NodeBuilder::new();
|
||||
b.ws(ws);
|
||||
b.text(start);
|
||||
b.ws(ws1);
|
||||
b.text(begin);
|
||||
b.text(name);
|
||||
b.children.extend(block_argument(argument)?.1);
|
||||
b.ws(ws_);
|
||||
b.nl(nl);
|
||||
|
||||
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
|
||||
if name.s.eq_ignore_ascii_case("SRC") {
|
||||
let (input, language) = opt(tuple((
|
||||
space1,
|
||||
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
|
||||
)))(input)?;
|
||||
let (input, switches) = opt(tuple((space1, source_block_switches)))(input)?;
|
||||
let (input, ws1) = space0(input)?;
|
||||
let (input, (parameters, ws2, nl)) = trim_line_end(input)?;
|
||||
|
||||
if let Some((ws, language)) = language {
|
||||
b.ws(ws);
|
||||
b.token(SRC_BLOCK_LANGUAGE, language);
|
||||
}
|
||||
if let Some((ws, switches)) = switches {
|
||||
b.ws(ws);
|
||||
b.token(SRC_BLOCK_SWITCHES, switches);
|
||||
}
|
||||
b.ws(ws1);
|
||||
if !parameters.is_empty() {
|
||||
b.token(SRC_BLOCK_PARAMETERS, parameters);
|
||||
}
|
||||
b.ws(ws2);
|
||||
b.nl(nl);
|
||||
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
|
||||
} else if name.s.eq_ignore_ascii_case("EXPORT") {
|
||||
let (input, ty) = opt(tuple((
|
||||
space1,
|
||||
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
|
||||
)))(input)?;
|
||||
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
|
||||
let (input, nl) = eol_or_eof(input)?;
|
||||
|
||||
if let Some((ws, ty)) = ty {
|
||||
b.ws(ws);
|
||||
b.token(EXPORT_BLOCK_TYPE, ty);
|
||||
}
|
||||
b.text(data);
|
||||
b.nl(nl);
|
||||
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
|
||||
} else {
|
||||
let (input, data) = take_while(|c: char| c != '\n' && c != '\r')(input)?;
|
||||
let (input, nl) = eol_or_eof(input)?;
|
||||
|
||||
b.text(data);
|
||||
b.nl(nl);
|
||||
Ok((input, (b.finish(BLOCK_BEGIN), name.as_str())))
|
||||
}
|
||||
}
|
||||
|
||||
fn source_block_switches(input: Input) -> IResult<Input, Input, ()> {
|
||||
let mut i = input;
|
||||
|
||||
while !i.is_empty() {
|
||||
match tuple::<_, _, (), _>((
|
||||
cond(i.input_len() != input.input_len(), space1),
|
||||
alt((
|
||||
separated_pair(
|
||||
alt((tag("-l"), tag("-n"))),
|
||||
space1,
|
||||
take_while1(|c: char| c != ' ' && c != '\t' && c != '\n' && c != '\r'),
|
||||
),
|
||||
tuple((tag("+"), alpha1)),
|
||||
tuple((tag("-"), alpha1)),
|
||||
)),
|
||||
))(i)
|
||||
{
|
||||
Ok((i_, _)) => i = i_,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let len = input.input_len() - i.input_len();
|
||||
|
||||
if len == 0 {
|
||||
Err(nom::Err::Error(()))
|
||||
} else {
|
||||
Ok(input.take_split(len))
|
||||
}
|
||||
}
|
||||
|
||||
fn block_end_node<'a>(input: Input<'a>, name: &str) -> IResult<Input<'a>, GreenElement, ()> {
|
||||
|
|
@ -112,55 +182,6 @@ fn comma_quoted_text_nodes(input: Input) -> Vec<GreenElement> {
|
|||
nodes
|
||||
}
|
||||
|
||||
fn block_argument(input: Input) -> IResult<Input, Vec<GreenElement>, ()> {
|
||||
let mut b = NodeBuilder::new();
|
||||
|
||||
let mut i = input;
|
||||
|
||||
while !i.is_empty() {
|
||||
let (input, ws) = space1(i)?;
|
||||
b.ws(ws);
|
||||
let (input, name) = take_while1(|c| c != ' ' && c != '\t')(input)?;
|
||||
b.text(name);
|
||||
if !name.s.starts_with(':') || input.is_empty() {
|
||||
debug_assert!(
|
||||
input.s.len() < i.s.len(),
|
||||
"{} < {}",
|
||||
input.s.len(),
|
||||
i.s.len()
|
||||
);
|
||||
i = input;
|
||||
continue;
|
||||
}
|
||||
let (input, ws) = space1(input)?;
|
||||
b.ws(ws);
|
||||
|
||||
if let Some(idx) = Substring::new(" :")
|
||||
.find(input.s)
|
||||
.or_else(|| Substring::new("\t:").find(input.s))
|
||||
{
|
||||
let idx = input.s[0..idx]
|
||||
.rfind(|c| c != ' ' && c != '\t')
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(idx);
|
||||
let (input, argument) = input.take_split(idx);
|
||||
b.text(argument);
|
||||
debug_assert!(
|
||||
input.s.len() < i.s.len(),
|
||||
"{} < {}",
|
||||
input.s.len(),
|
||||
i.s.len()
|
||||
);
|
||||
i = input;
|
||||
} else {
|
||||
b.text(input);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((i, b.children))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(input), fields(input = input.s))]
|
||||
pub fn block_node(input: Input) -> IResult<Input, GreenElement, ()> {
|
||||
crate::lossless_parser!(block_node_base, input)
|
||||
|
|
@ -255,19 +276,11 @@ alert('Hello World!');
|
|||
TEXT@0..8 "#+BEGIN_"
|
||||
TEXT@8..11 "SRC"
|
||||
WHITESPACE@11..12 " "
|
||||
TEXT@12..22 "javascript"
|
||||
SRC_BLOCK_LANGUAGE@12..22 "javascript"
|
||||
WHITESPACE@22..24 " "
|
||||
TEXT@24..26 "-n"
|
||||
WHITESPACE@26..27 " "
|
||||
TEXT@27..29 "20"
|
||||
WHITESPACE@29..30 " "
|
||||
TEXT@30..32 "-r"
|
||||
SRC_BLOCK_SWITCHES@24..32 "-n 20 -r"
|
||||
WHITESPACE@32..34 " "
|
||||
TEXT@34..38 ":var"
|
||||
WHITESPACE@38..39 " "
|
||||
TEXT@39..47 "n=0, l=2"
|
||||
WHITESPACE@47..49 " "
|
||||
TEXT@49..57 ":foo=bar"
|
||||
SRC_BLOCK_PARAMETERS@34..57 ":var n=0, l=2 :foo=bar"
|
||||
NEW_LINE@57..58 "\n"
|
||||
BLOCK_CONTENT@58..81
|
||||
TEXT@58..81 "alert('Hello World!');\n"
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ macro_rules! lossless_parser {
|
|||
($parser:expr, $input:expr) => {{
|
||||
let i_ = $input;
|
||||
let (i, o) = $parser($input)?;
|
||||
tracing::info!(consumed = o.to_string());
|
||||
tracing::trace!(consumed = o.to_string());
|
||||
debug_assert_eq!(
|
||||
&i_.as_str()[0..(i_.s.len() - i.s.len())],
|
||||
&o.to_string(),
|
||||
|
|
@ -292,7 +292,9 @@ impl NodeBuilder {
|
|||
}
|
||||
|
||||
pub fn text(&mut self, i: Input) {
|
||||
self.children.push(i.text_token())
|
||||
if !i.is_empty() {
|
||||
self.children.push(i.text_token())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn token(&mut self, kind: SyntaxKind, i: Input) {
|
||||
|
|
|
|||
|
|
@ -110,7 +110,6 @@ fn parse() {
|
|||
COLON@3..4 ":"
|
||||
TEXT@4..8 "WORD"
|
||||
R_BRACKET@8..9 "]"
|
||||
TEXT@9..9 ""
|
||||
"###
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -178,6 +178,10 @@ pub enum SyntaxKind {
|
|||
BLOCK_BEGIN,
|
||||
BLOCK_END,
|
||||
BLOCK_CONTENT,
|
||||
SRC_BLOCK_SWITCHES,
|
||||
SRC_BLOCK_LANGUAGE,
|
||||
SRC_BLOCK_PARAMETERS,
|
||||
EXPORT_BLOCK_TYPE,
|
||||
LATEX_ENVIRONMENT,
|
||||
|
||||
//
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue