diff --git a/Cargo.toml b/Cargo.toml index 5cfcac2..8ca8bee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,11 @@ readme = "README.md" description = "Grammar framework." [dependencies] +cyclotron = "0.0.3" +elsa = "1.3.2" indexmap = "1" indexing = "0.3.2" proc-macro2 = "0.4.30" -elsa = "1.3.2" [lib] doctest = false diff --git a/src/forest.rs b/src/forest.rs index b097f19..961a57b 100644 --- a/src/forest.rs +++ b/src/forest.rs @@ -5,6 +5,8 @@ use std::collections::{BTreeSet, HashMap, VecDeque}; use std::fmt; use std::hash::Hash; use std::io::{self, Write}; +use std::iter; +use std::rc::Rc; use std::str; #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -72,25 +74,25 @@ impl fmt::Debug for Node<'_, P> { } /// A parse forest, in SPPF (Shared Packed Parse Forest) representation. -pub struct ParseForest<'i, G: GrammarReflector, I: Input> { +pub struct ParseForest<'i, P, G, I: Input> { pub grammar: G, // HACK(eddyb) `pub(crate)` only for `parser`. pub(crate) input: Container<'i, I::Container>, - pub(crate) possible_choices: HashMap, BTreeSet>, - pub(crate) possible_splits: HashMap, BTreeSet>, + pub(crate) possible_choices: HashMap, BTreeSet

>, + pub(crate) possible_splits: HashMap, BTreeSet>, } type_lambda! { - pub type<'i> ParseForestL = ParseForest<'i, G, I>; + pub type<'i> ParseForestL = ParseForest<'i, P, G, I>; pub type<'i> NodeL

= Node<'i, P>; } -pub type OwnedParseForestAndNode = ExistsL, NodeL

>>; +pub type OwnedParseForestAndNode = ExistsL, NodeL

>>; #[derive(Debug)] pub struct MoreThanOne; -impl<'i, P, G, I: Input> ParseForest<'i, G, I> +impl<'i, P, G, I: Input> ParseForest<'i, P, G, I> where // FIXME(eddyb) these shouldn't be needed, as they are bounds on // `GrammarReflector::NodeKind`, but that's ignored currently. @@ -225,14 +227,17 @@ where } } - pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> { + pub fn dump_graphviz(&self, root: Option>, out: &mut dyn Write) -> io::Result<()> { writeln!(out, "digraph forest {{")?; - let mut queue: VecDeque<_> = self - .possible_choices - .keys() - .chain(self.possible_splits.keys()) - .cloned() - .collect(); + let mut queue: VecDeque<_> = match root { + Some(root) => iter::once(root).collect(), + None => self + .possible_choices + .keys() + .chain(self.possible_splits.keys()) + .cloned() + .collect(), + }; let mut seen: BTreeSet<_> = queue.iter().cloned().collect(); let mut p = 0; let node_name = |Node { kind, range }| { @@ -293,6 +298,101 @@ where } } +// TODO(eddyb) remove this entirely, only user of it left is `ListHandle`. +#[derive(Clone, Debug)] +pub struct DynExpandedTree<'i, P> { + pub node: Node<'i, P>, + pub kind: DynExpandedTreeKind<'i, P>, +} + +#[derive(Clone, Debug)] +pub enum DynExpandedTreeKind<'i, P> { + Leaf, + Or(P, Rc>), + Opt(Option>>), + Concat([Rc>; 2]), +} + +impl<'i, P> DynExpandedTree<'i, P> +where + P: fmt::Debug + Ord + Hash + Copy, +{ + pub fn one_from_node( + forest: &ParseForest<'i, P, G, I>, + node: Node<'i, P>, + ) -> Result, MoreThanOne> + where + G: GrammarReflector, + I: Input, + { + let kind = match forest.grammar.node_shape(node.kind) { + NodeShape::Opaque | NodeShape::Alias(_) => DynExpandedTreeKind::Leaf, + NodeShape::Choice => { + let child = forest.one_choice(node)?; + DynExpandedTreeKind::Or(child.kind, Self::one_from_node(forest, child)?) + } + NodeShape::Opt(_) => DynExpandedTreeKind::Opt(match forest.unpack_opt(node) { + Some(child) => Some(Self::one_from_node(forest, child)?), + None => None, + }), + NodeShape::Split(..) => { + let (left, right) = forest.one_split(node)?; + DynExpandedTreeKind::Concat([ + Self::one_from_node(forest, left)?, + Self::one_from_node(forest, right)?, + ]) + } + }; + Ok(Rc::new(DynExpandedTree { node, kind })) + } + + pub fn all_from_node( + forest: &ParseForest<'i, P, G, I>, + node: Node<'i, P>, + ) -> Vec> + where + G: GrammarReflector, + I: Input, + { + let new = |kind| Rc::new(DynExpandedTree { node, kind }); + match forest.grammar.node_shape(node.kind) { + NodeShape::Opaque | NodeShape::Alias(_) => vec![new(DynExpandedTreeKind::Leaf)], + NodeShape::Choice => forest + .all_choices(node) + .flat_map(|child| { + Self::all_from_node(forest, child) + .into_iter() + .map(move |child_tree| new(DynExpandedTreeKind::Or(child.kind, child_tree))) + }) + .collect(), + NodeShape::Opt(_) => match forest.unpack_opt(node) { + Some(child) => Self::all_from_node(forest, child) + .into_iter() + .map(|child_tree| new(DynExpandedTreeKind::Opt(Some(child_tree)))) + .collect(), + None => vec![new(DynExpandedTreeKind::Opt(None))], + }, + NodeShape::Split(..) => forest + .all_splits(node) + .flat_map(|(left, right)| { + Self::all_from_node(forest, left) + .into_iter() + .flat_map(move |left_tree| { + Self::all_from_node(forest, right) + .into_iter() + .map(move |right_tree| { + new(DynExpandedTreeKind::Concat([ + left_tree.clone(), + right_tree, + ])) + }) + }) + }) + .collect(), + } + } +} + // FIXME(rust-lang/rust#54175) work around iterator adapter compile-time // blowup issues by using a makeshift "non-determinism arrow toolkit". pub mod nd { diff --git a/src/lib.rs b/src/lib.rs index 93fd2d8..11f7949 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,8 @@ pub mod forest; #[forbid(unsafe_code)] pub mod input; #[forbid(unsafe_code)] +pub mod lyg; +#[forbid(unsafe_code)] pub mod parser; #[forbid(unsafe_code)] pub mod proc_macro; @@ -21,6 +23,8 @@ pub mod proc_macro; pub mod rule; #[forbid(unsafe_code)] pub mod scannerless; +#[forbid(unsafe_code)] +pub mod slow_bruteforce_interpreter; // HACK(eddyb) this contains impls for types in `proc_macro`, which depend on // `input`, collapse this back into `proc_macro`. @@ -78,101 +82,3 @@ impl Grammar { } } } - -/// Construct a (meta-)grammar for parsing a grammar. -pub fn grammar_grammar>(cx: &Context) -> Grammar { - use crate::rule::*; - - // HACK(eddyb) more explicit subset of the grammar, for bootstrapping. - macro_rules! rule { - ({ $start:tt ..= $end:tt }) => { - eat($start..=$end) - }; - ({ ! $pat:tt }) => { - negative_lookahead($pat) - }; - ({ ! $start:tt ..= $end:tt }) => { - negative_lookahead($start..=$end) - }; - ($rule:ident) => { - call(stringify!($rule)) - }; - ({ $name:ident : $rule:tt }) => { - rule!($rule).field(stringify!($name)) - }; - ({ $rule:tt ? }) => { - rule!($rule).opt() - }; - ({ $elem:tt * }) => { - rule!($elem).repeat_many() - }; - ({ $elem:tt + }) => { - rule!($elem).repeat_more() - }; - ({ $elem:tt + % $sep:tt }) => { - rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple) - }; - ({ $rule0:tt $(| $rule:tt)+ }) => { - rule!($rule0) $(| rule!($rule))+ - }; - ({ $rule0:tt $($rule:tt)* }) => { - rule!($rule0) $(+ rule!($rule))* - }; - ($pat:expr) => { - eat($pat) - }; - } - - macro_rules! grammar { - ($($rule_name:ident = $($rule:tt)|+;)*) => ({ - let mut grammar = Grammar::new(); - $(grammar.define( - cx.intern(stringify!($rule_name)), - rule!({ $($rule)|+ }).finish(cx), - );)* - grammar - }) - } - - // Main grammar. - let mut grammar = grammar! { - Grammar = { FileStart {rules:{RuleDef*}} FileEnd }; - RuleDef = { {name:Ident} "=" {rule:Or} ";" }; - Or = {{"|"?} {rules:{Concat+ % "|"}}}; - Concat = {rules:{Rule+}}; - Rule = { {{ {field:Ident} ":" }?} {rule:Primary} {{modifier:Modifier}?} }; - Primary = - {Eat:Pattern} | - {Call:Ident} | - {Group:{ "{" {{or:Or}?} "}" }}; - Modifier = - {Opt:"?"} | - {Repeat:{ {repeat:Repeat} {{ {kind:SepKind} {sep:Primary} }?} }}; - Repeat = - {Many:"*"} | - {More:"+"}; - SepKind = - {Simple:"%"} | - // HACK(eddyb) should be "%%", but `rustc`'s `proc_macro` server doesn't - // always preserve jointness, except within multi-character Rust operators. - {Trailing:{"%" "%"}}; - Pattern = - {Str:StrLit} | - {CharRange:{ {{start:CharLit}?} ".." {{end:CharLit}?} }} | - {CharRangeInclusive:{ {{start:CharLit}?} "..=" {end:CharLit} }}; - }; - - // Lexical fragment of the grammar. - grammar.extend(grammar! { - FileStart = ""; - FileEnd = ""; - - Ident = IDENT; - - // FIXME(eddyb) restrict literals, once `proc_macro` allows it. - StrLit = LITERAL; - CharLit = LITERAL; - }); - - grammar -} diff --git a/src/lyg.rs b/src/lyg.rs new file mode 100644 index 0000000..8319084 --- /dev/null +++ b/src/lyg.rs @@ -0,0 +1,329 @@ +use crate::context::Context; +use crate::parser::ParseError; +use crate::proc_macro::{FlatToken, Pat as PMPat, Span, TokenStream}; +use crate::rule; +use crate::scannerless::Pat as SPat; +use crate::Grammar; +use std::hash::Hash; +use std::ops::Bound; + +/// Construct a (meta-)grammar for parsing a `lyg` grammar. +pub fn grammar>(cx: &Context) -> Grammar { + use crate::rule::*; + + // HACK(eddyb) more explicit subset of the grammar, for bootstrapping. + macro_rules! rule { + ({ $start:tt ..= $end:tt }) => { + eat($start..=$end) + }; + ({ ! $pat:tt }) => { + negative_lookahead($pat) + }; + ({ ! $start:tt ..= $end:tt }) => { + negative_lookahead($start..=$end) + }; + ($rule:ident) => { + call(stringify!($rule)) + }; + ({ $name:ident : $rule:tt }) => { + rule!($rule).field(stringify!($name)) + }; + ({ $rule:tt ? }) => { + rule!($rule).opt() + }; + ({ $elem:tt * }) => { + rule!($elem).repeat_many() + }; + ({ $elem:tt + }) => { + rule!($elem).repeat_more() + }; + ({ $elem:tt + % $sep:tt }) => { + rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple) + }; + ({ $rule0:tt $(| $rule:tt)+ }) => { + rule!($rule0) $(| rule!($rule))+ + }; + ({ $rule0:tt $($rule:tt)* }) => { + rule!($rule0) $(+ rule!($rule))* + }; + ($pat:expr) => { + eat($pat) + }; + } + + macro_rules! grammar { + ($($rule_name:ident = $($rule:tt)|+;)*) => ({ + let mut grammar = Grammar::new(); + $(grammar.define( + cx.intern(stringify!($rule_name)), + rule!({ $($rule)|+ }).finish(cx), + );)* + grammar + }) + } + + // Main grammar. + let mut grammar = grammar! { + Grammar = { FileStart {rules:{RuleDef*}} FileEnd }; + RuleDef = { {name:Ident} "=" {rule:Or} ";" }; + Or = {{"|"?} {rules:{Concat+ % "|"}}}; + Concat = {rules:{Rule+}}; + Rule = { {{ {field:Ident} ":" }?} {rule:Primary} {{modifier:Modifier}?} }; + Primary = + {Eat:Pattern} | + {Call:Ident} | + {Group:{ "{" {{or:Or}?} "}" }}; + Modifier = + {Opt:"?"} | + {Repeat:{ {repeat:Repeat} {{ {kind:SepKind} {sep:Primary} }?} }}; + Repeat = + {Many:"*"} | + {More:"+"}; + SepKind = + {Simple:"%"} | + // HACK(eddyb) should be "%%", but `rustc`'s `proc_macro` server doesn't + // always preserve jointness, except within multi-character Rust operators. + {Trailing:{"%" "%"}}; + Pattern = + {Str:StrLit} | + {CharRange:{ {{start:CharLit}?} ".." {{end:CharLit}?} }} | + {CharRangeInclusive:{ {{start:CharLit}?} "..=" {end:CharLit} }}; + }; + + // Lexical fragment of the grammar. + grammar.extend(grammar! { + FileStart = ""; + FileEnd = ""; + + Ident = IDENT; + + // FIXME(eddyb) restrict literals, once `proc_macro` allows it. + StrLit = LITERAL; + CharLit = LITERAL; + }); + + grammar +} + +type Handle<'a, 'i> = crate::slow_bruteforce_interpreter::Handle<'a, 'i, PMPat, TokenStream>; + +macro_rules! handle { + (let _ = $handle:expr) => { + let _ = $handle; + }; + (let $x:ident = $handle:expr) => { + let $x = $handle; + }; + (@rec let { $($field:ident),* $(,)? } = $rec:expr) => { + let rec = &$rec; + $(handle!(let $field = rec.get_by_str(stringify!($field)).unwrap());)* + }; + (let { $($pat:tt)* } = $handle:expr) => { + handle!(@rec let { $($pat)* } = $handle.one_record().unwrap()) + }; + + (if let _ = $handle:ident $body:block) => { + match $handle { _ => $body } + }; + (if let $x:ident = $handle:ident $body:block) => { + match $handle { $x => $body } + }; + (@rec if let {} = $rec:ident $body:block) => { + match $rec { _ => $body } + }; + (@rec if let { $field:ident: $pat:tt $(, $($rest:tt)*)? } = $rec:ident $body:block) => { + if let Some(x) = $rec.get_by_str(stringify!($field)) { + handle!(if let $pat = x { + handle!(@rec if let { $($($rest)*)? } = $rec $body) + }) + } + }; + (@rec if let { $field:ident $(,)? $(, $($rest:tt)*)? } = $rec:ident $body:block) => { + handle!(@rec if let { $field: $field $(, $($rest)*)? } = $rec $body) + }; + + (if let { $($pat:tt)* } = $handle:ident $body:block) => { + match $handle.one_record().unwrap() { + rec => handle!(@rec if let { $($pat)* } = rec $body) + } + }; + (match $rec:ident { $($pat:tt => $e:expr),* $(,)? }) => { + loop { + $(handle!(if let $pat = $rec { + break $e; + });)* + #[allow(unreachable_code)] { + unreachable!(); + } + } + }; +} + +pub fn parse>( + cx: &Context, + stream: TokenStream, +) -> Result> { + let lyg_cx = &crate::proc_macro::Context::new(); + let mut lyg_grammar; + + let g = { + let cx = lyg_cx; + lyg_grammar = crate::proc_macro::builtin(cx); + lyg_grammar.extend(grammar(cx)); + crate::slow_bruteforce_interpreter::parse( + cx, + &lyg_grammar, + cx.intern("Grammar"), + stream.clone(), + ) + }; + + let mut grammar = Grammar::new(); + g?.with(|g| { + handle!(let { rules } = g); + for rule_def in rules.as_list() { + handle!(let { name, rule } = rule_def.unwrap()); + let name = match name.source() { + [FlatToken::Ident(ident)] => ident.to_string(), + _ => unreachable!(), + }; + grammar.define(cx.intern(&name[..]), lower_or(rule, cx)); + } + }); + Ok(grammar) +} + +fn lower_or>( + this: Handle<'_, '_>, + cx: &Context, +) -> rule::RuleWithFields { + handle!(let { rules } = this); + let mut rules = rules.as_list().map(|rule| rule.unwrap()); + let first = lower_concat(rules.next().unwrap(), cx); + rules.fold(first, |a, b| (a | lower_concat(b, cx)).finish(cx)) +} + +fn lower_concat>( + this: Handle<'_, '_>, + cx: &Context, +) -> rule::RuleWithFields { + handle!(let { rules } = this); + rules + .as_list() + .map(|rule| rule.unwrap()) + .fold(rule::empty().finish(cx), |a, b| { + (a + lower_rule(b, cx)).finish(cx) + }) +} + +fn lower_rule>( + this: Handle<'_, '_>, + cx: &Context, +) -> rule::RuleWithFields { + handle!(let { rule } = this); + let mut rule = lower_primary(rule, cx); + handle!(if let { modifier } = this { + rule = lower_modifier(modifier, cx, rule); + }); + handle!(if let { field } = this { + let field = match field.source() { + [FlatToken::Ident(ident)] => ident.to_string(), + _ => unreachable!(), + }; + rule = rule.field(&field).finish(cx); + }); + rule +} + +fn lower_primary>( + this: Handle<'_, '_>, + cx: &Context, +) -> rule::RuleWithFields { + handle!(match this { + {Eat:pat} => rule::eat(lower_pattern(pat)).finish(cx), + {Call:name} => { + let name = match name.source() { + [FlatToken::Ident(ident)] => ident.to_string(), + _ => unreachable!(), + }; + rule::call(&name).finish(cx) + }, + {Group:{ or }} => lower_or(or, cx), + {Group:_} => rule::empty().finish(cx), + }) +} + +fn lower_modifier>( + this: Handle<'_, '_>, + cx: &Context, + rule: rule::RuleWithFields, +) -> rule::RuleWithFields { + handle!(match this { + {Opt:_} => rule.opt().finish(cx), + {Repeat:{ repeat, sep, kind }} => { + let repeat = repeat; + let sep = lower_primary(sep, cx); + let kind = lower_sep_kind(kind); + handle!(match repeat { + {Many:_} => rule.repeat_many_sep(sep, kind).finish(cx), + {More:_} => rule.repeat_more_sep(sep, kind).finish(cx), + }) + }, + {Repeat:{ repeat }} => { + let repeat = repeat; + handle!(match repeat { + {Many:_} => rule.repeat_many().finish(cx), + {More:_} => rule.repeat_more().finish(cx), + }) + } + }) +} + +fn lower_sep_kind(this: Handle<'_, '_>) -> rule::SepKind { + handle!(match this { + {Simple:_} => rule::SepKind::Simple, + {Trailing:_} => rule::SepKind::Trailing, + }) +} + +fn lower_pattern(this: Handle<'_, '_>) -> SPat { + fn unescape(handle: Handle<'_, '_>) -> String { + let mut out = String::new(); + let s = match handle.source() { + [FlatToken::Literal(lit)] => lit.to_string(), + _ => unreachable!(), + }; + let mut chars = s[1..s.len() - 1].chars(); + while let Some(c) = chars.next() { + let c = match c { + '\\' => match chars.next().unwrap() { + 't' => '\t', + 'n' => '\n', + 'r' => '\r', + c => c, + }, + _ => c, + }; + out.push(c); + } + out + } + let unescape_char = |c| unescape(c).parse::().unwrap(); + handle!(match this { + {Str:s} => SPat::from(unescape(s)), + {CharRange:_} => SPat::from(( + handle!(match this { {CharRange:{ start }} => Some(start), _ => None }) + .map(unescape_char) + .map_or(Bound::Unbounded, Bound::Included), + handle!(match this { {CharRange:{ end }} => Some(end), _ => None }) + .map(unescape_char) + .map_or(Bound::Unbounded, Bound::Excluded), + )), + {CharRangeInclusive:{ end }} => SPat::from(( + handle!(match this { {CharRangeInclusive:{ start }} => Some(start), _ => None }) + .map(unescape_char) + .map_or(Bound::Unbounded, Bound::Included), + Bound::Included(unescape_char(end)), + )), + }) +} diff --git a/src/parser.rs b/src/parser.rs index 9204b2b..b5fe72e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -13,7 +13,7 @@ pub struct Parser<'a, 'i, G: GrammarReflector, I: Input, Pat> { } struct ParserState<'i, G: GrammarReflector, I: Input, Pat> { - forest: ParseForest<'i, G, I>, + forest: ParseForest<'i, G::NodeKind, G, I>, last_input_pos: Index<'i, Unknown>, expected_pats: Vec, } diff --git a/src/proc_macro_input.rs b/src/proc_macro_input.rs index f7a0c77..dcb63aa 100644 --- a/src/proc_macro_input.rs +++ b/src/proc_macro_input.rs @@ -1,7 +1,7 @@ use crate::input::{Input, InputMatch, Range}; -use crate::proc_macro::{flatten, FlatToken, FlatTokenPat, Span, TokenStream}; +use crate::proc_macro::{flatten, FlatToken, FlatTokenPat, Pat, Span, TokenStream}; use indexing::{proof::Provable, Container, Index, Unknown}; -use std::ops; +use std::ops::{self, Deref}; impl Input for TokenStream { type Container = Vec; @@ -52,8 +52,20 @@ impl Input for TokenStream { } } -impl InputMatch<[FlatTokenPat<&'_ str>]> for [FlatToken] { - fn match_left(&self, pat: &[FlatTokenPat<&str>]) -> Option { +// FIXME(eddyb) can't use `Pats: AsRef<[FlatTokenPat]` as it doesn't constrain `S`. +impl, Pats: Deref]>> InputMatch> + for [FlatToken] +{ + fn match_left(&self, pat: &Pat) -> Option { + self.match_left(&*pat.0) + } + fn match_right(&self, pat: &Pat) -> Option { + self.match_right(&*pat.0) + } +} + +impl> InputMatch<[FlatTokenPat]> for [FlatToken] { + fn match_left(&self, pat: &[FlatTokenPat]) -> Option { if self .iter() .zip(pat) @@ -66,7 +78,7 @@ impl InputMatch<[FlatTokenPat<&'_ str>]> for [FlatToken] { None } } - fn match_right(&self, pat: &[FlatTokenPat<&str>]) -> Option { + fn match_right(&self, pat: &[FlatTokenPat]) -> Option { if self .iter() .zip(pat) diff --git a/src/rule.rs b/src/rule.rs index 821e585..f83cead 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -418,21 +418,39 @@ impl IRule { Rule::Concat([left, right]) => NodeShape::Split(left, right), Rule::Or(_) => NodeShape::Choice, Rule::Opt(rule) => NodeShape::Opt(rule), - Rule::RepeatMany(elem, sep) => NodeShape::Opt(cx.intern(Rule::RepeatMore(elem, sep))), - Rule::RepeatMore(rule, None) => { - NodeShape::Split(rule, cx.intern(Rule::RepeatMany(rule, None))) + Rule::RepeatMany(..) | Rule::RepeatMore(..) => { + NodeShape::Alias(self.expand_repeats(cx)) } - Rule::RepeatMore(elem, Some((sep, SepKind::Simple))) => NodeShape::Split( - elem, - cx.intern(Rule::Opt(cx.intern(Rule::Concat([sep, self])))), - ), - Rule::RepeatMore(elem, Some((sep, SepKind::Trailing))) => NodeShape::Split( + } + } + + pub fn expand_repeats(self, cx: &Context) -> Self { + match cx[self] { + Rule::Empty + | Rule::Eat(_) + | Rule::Call(_) + | Rule::Concat(_) + | Rule::Or(_) + | Rule::Opt(_) => self, + + Rule::RepeatMany(elem, sep) => { + cx.intern(Rule::Opt(cx.intern(Rule::RepeatMore(elem, sep)))) + } + Rule::RepeatMore(elem, sep) => cx.intern(Rule::Concat([ elem, - cx.intern(Rule::Opt(cx.intern(Rule::Concat([ - sep, - cx.intern(Rule::RepeatMany(elem, Some((sep, SepKind::Trailing)))), - ])))), - ), + match sep { + None => cx.intern(Rule::RepeatMany(elem, None)), + Some((sep, kind)) => cx.intern(Rule::Opt(cx.intern(Rule::Concat([ + sep, + match kind { + SepKind::Simple => self, + SepKind::Trailing => { + cx.intern(Rule::RepeatMany(elem, Some((sep, SepKind::Trailing)))) + } + }, + ])))), + }, + ])), } } diff --git a/src/scannerless.rs b/src/scannerless.rs index 4139177..9410a08 100644 --- a/src/scannerless.rs +++ b/src/scannerless.rs @@ -1,3 +1,4 @@ +use crate::input::InputMatch; use crate::rule::{MatchesEmpty, MaybeKnown}; use std::char; use std::fmt; @@ -98,3 +99,30 @@ impl> MatchesEmpty for Pat { }) } } + +impl InputMatch> for str +where + str: InputMatch + InputMatch>, +{ + fn match_left(&self, pat: &Pat) -> Option { + match pat { + Pat::String(s) => self.match_left(s), + &Pat::Range(start, end) => self.match_left(&(start..=end)), + } + } + fn match_right(&self, pat: &Pat) -> Option { + match pat { + Pat::String(s) => self.match_right(s), + &Pat::Range(start, end) => self.match_right(&(start..=end)), + } + } +} + +impl InputMatch for str { + fn match_left(&self, pat: &String) -> Option { + self.match_left(&pat[..]) + } + fn match_right(&self, pat: &String) -> Option { + self.match_right(&pat[..]) + } +} diff --git a/src/slow_bruteforce_interpreter.rs b/src/slow_bruteforce_interpreter.rs new file mode 100644 index 0000000..1cd1025 --- /dev/null +++ b/src/slow_bruteforce_interpreter.rs @@ -0,0 +1,580 @@ +use crate::context::{Context, IFields, IRule, IStr}; +use crate::forest::{ + DynExpandedTree, DynExpandedTreeKind, GrammarReflector, Node, NodeShape, + OwnedParseForestAndNode, ParseForest, +}; +use crate::input::{Input, InputMatch, Range}; +use crate::parser::{ParseResult, Parser}; +use crate::rule::{Fields, Rule}; +use cyclotron::bruteforce; +use indexmap::IndexMap; +use std::cell::RefCell; +use std::collections::BTreeSet; +use std::fmt; +use std::hash::Hash; +use std::iter; +use std::rc::Rc; + +pub struct SlowBruteforceInterpreter<'a, Pat> { + pub cx: &'a Context, + pub grammar: &'a crate::Grammar, +} + +impl GrammarReflector for SlowBruteforceInterpreter<'_, Pat> { + type NodeKind = IRule; + + fn node_shape(&self, rule: IRule) -> NodeShape { + rule.node_shape(self.cx, Some(&self.grammar.rules)) + } + fn node_desc(&self, rule: IRule) -> String { + rule.node_desc(self.cx) + } +} + +pub fn parse<'a, Pat: Clone + Ord + Hash + fmt::Debug, I: Input>( + cx: &'a Context, + grammar: &'a crate::Grammar, + named_rule: IStr, + input: I, +) -> ParseResult> +where + I::Slice: InputMatch, +{ + fn parse_inner<'i, Pat: Clone + Ord + Hash + fmt::Debug, I: Input>( + cx: &Context, + grammar: &crate::Grammar, + parser: &RefCell, I, Pat>>, + parse_cached: &mut dyn FnMut((IRule, Range<'i>)) -> BTreeSet, + rule: IRule, + range: Range<'i>, + ) -> BTreeSet + where + I::Slice: InputMatch, + { + match cx[rule] { + Rule::Empty => iter::once(0).collect(), + // FIXME(eddyb) find a way to avoid cloning the pattern. + Rule::Eat(ref pat) => parser + .borrow_mut() + .with_result_and_remaining(Range(range.frontiers().0), range) + .input_consume_left(pat.clone()) + .map(|parser| parser.result().len()) + .into_iter() + .collect(), + Rule::Call(r) => parse_cached((grammar.rules[&r].rule, range)), + Rule::Concat([left, right]) => { + parse_inner(cx, grammar, parser, parse_cached, left, range) + .into_iter() + .flat_map(|left_len| { + let (left_result, after_left, _) = range.split_at(left_len); + parse_inner(cx, grammar, parser, parse_cached, right, Range(after_left)) + .into_iter() + .map(move |right_len| { + let (right_result, after_right, _) = after_left.split_at(right_len); + parser + .borrow_mut() + .with_result_and_remaining( + Range(right_result), + Range(after_right), + ) + .forest_add_split( + rule, + Node { + kind: left, + range: Range(left_result), + }, + ); + left_len + right_len + }) + }) + .collect() + } + Rule::Or(ref cases) => cases + .iter() + .flat_map(|&case| { + parse_inner(cx, grammar, parser, parse_cached, case, range) + .into_iter() + .map(move |len| { + let (result, remaining, _) = range.split_at(len); + parser + .borrow_mut() + .with_result_and_remaining(Range(result), Range(remaining)) + .forest_add_choice(rule, case); + len + }) + }) + .collect(), + Rule::Opt(rule) => iter::once(0) + .chain(parse_inner(cx, grammar, parser, parse_cached, rule, range)) + .collect(), + Rule::RepeatMany(..) | Rule::RepeatMore(..) => { + parse_cached((rule.expand_repeats(cx), range)) + } + } + } + + Parser::parse_with(SlowBruteforceInterpreter { cx, grammar }, input, |parser| { + let full_input = parser.remaining(); + let parser = &RefCell::new(parser); + let results = bruteforce::memoize(|parse_cached, (rule, range)| { + parse_inner(cx, grammar, parser, parse_cached, rule, range) + })((grammar.rules[&named_rule].rule, full_input)); + results + .into_iter() + .map(|len| Node { + kind: cx.intern(Rule::Call(named_rule)), + range: Range(full_input.split_at(len).0), + }) + .rev() + .next() + }) + .map(|forest_and_node| OwnedHandle { forest_and_node }) +} + +#[derive(Debug)] +pub struct Ambiguity(T); + +pub struct OwnedHandle<'a, Pat: Eq + Hash + fmt::Debug, I: Input> { + forest_and_node: OwnedParseForestAndNode, IRule, I>, +} + +impl OwnedHandle<'_, Pat, I> { + pub fn source_info(&self) -> I::SourceInfo { + self.forest_and_node.unpack_ref(|_, forest_and_node| { + let (ref forest, node) = *forest_and_node; + forest.source_info(node.range) + }) + } + + pub fn with(&self, f: impl FnOnce(Handle<'_, '_, Pat, I>) -> R) -> R { + self.forest_and_node.unpack_ref(|_, forest_and_node| { + let (ref forest, node) = *forest_and_node; + f(Handle { + forest, + node, + fields: None, + }) + }) + } +} + +impl fmt::Debug for OwnedHandle<'_, Pat, I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|handle| handle.fmt(f)) + } +} + +pub struct Handle<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> { + pub forest: &'a ParseForest<'i, IRule, SlowBruteforceInterpreter<'a, Pat>, I>, + pub node: Node<'i, IRule>, + pub fields: Option, +} + +impl Copy for Handle<'_, '_, Pat, I> {} + +impl Clone for Handle<'_, '_, Pat, I> { + fn clone(&self) -> Self { + *self + } +} + +impl fmt::Debug for Handle<'_, '_, Pat, I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.source_info().fmt(f)?; + + let cx = self.forest.grammar.cx; + let mut first = true; + if self.fields.is_some() { + f.write_str(" => ")?; + for x in self.all_records() { + if !first { + f.write_str(" | ")?; + } + first = false; + x.fmt(f)?; + } + } else { + match cx[self.node.kind] { + Rule::Call(name) => { + f.write_str(" => ")?; + for x in self.all_records() { + if !first { + f.write_str(" | ")?; + } + first = false; + f.write_str(&cx[name])?; + f.write_str("(")?; + x.fmt(f)?; + f.write_str(")")?; + } + } + Rule::RepeatMany(..) | Rule::RepeatMore(..) => { + f.write_str(" => ")?; + for x in self.all_lists() { + if !first { + f.write_str(" | ")?; + } + first = false; + x.fmt(f)?; + } + } + _ => {} + } + } + Ok(()) + } +} + +impl<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> Handle<'a, 'i, Pat, I> { + pub fn source(self) -> &'a I::Slice { + self.forest.input(self.node.range) + } + + pub fn source_info(self) -> I::SourceInfo { + self.forest.source_info(self.node.range) + } + + // FIXME(eddyb) maybe these should be deep?! then `DynExpandedTree` shouldn't + // be controlled using `Alias` but something else (and maybe stop using `Alias` + // for `Repeat{Many,More}`?). This is all kinda tricky. + pub fn as_list(mut self) -> ListHandle<'a, 'i, Pat, I> { + assert_eq!(self.fields, None); + let tree = match self.forest.grammar.cx[self.node.kind] { + Rule::RepeatMany(..) => { + // Can't be ambiguous, due to being `Opt`. + self.node = self.forest.unpack_alias(self.node); + DynExpandedTree::one_from_node(self.forest, self.node).unwrap() + } + Rule::RepeatMore(..) => { + // Might be ambiguous, fake it being a `Many`. + // NOTE(eddyb) the unwrap is fine because we haven't done `unpack_alias`. + let many = DynExpandedTree::one_from_node(self.forest, self.node).unwrap(); + Rc::new(DynExpandedTree { + node: self.node, + kind: DynExpandedTreeKind::Opt(Some(many)), + }) + } + _ => unreachable!("not a list"), + }; + ListHandle { + forest: self.forest, + tree, + } + } + + // FIXME(eddyb) move to `ListHandle` *or* make deep. + fn all_lists(mut self) -> impl Iterator> { + assert_eq!(self.fields, None); + match self.forest.grammar.cx[self.node.kind] { + Rule::RepeatMany(..) | Rule::RepeatMore(..) => {} + _ => unreachable!("not a list"), + } + self.node = self.forest.unpack_alias(self.node); + DynExpandedTree::all_from_node(self.forest, self.node) + .into_iter() + .map(move |tree| ListHandle { + forest: self.forest, + tree, + }) + } + + pub fn one_record(self) -> Result, Ambiguity> { + let forest = self.forest; + let rec = |fields| Ok(Record { forest, fields }); + let cx = forest.grammar.cx; + + let mut node = self.node; + let fields = self.fields.unwrap_or_else(|| match cx[node.kind] { + Rule::Call(name) => { + if let NodeShape::Alias(inner) = forest.grammar.node_shape(node.kind) { + node.kind = inner; + } + forest.grammar.grammar.rules[&name].fields + } + _ => unreachable!("not a record"), + }); + + let children = match &cx[fields] { + Fields::Leaf(field) => { + return rec(field + .into_iter() + .map(|field| { + ( + field.name, + ( + node, + if cx[field.sub] == Fields::Leaf(None) { + // HACK(eddyb) figure out a nicer way to communicate leaves. + None + } else { + Some(field.sub) + }, + ), + ) + }) + .collect()); + } + Fields::Aggregate(children) => children, + }; + let one_child_record = |child, i| { + Handle { + forest, + node: child, + fields: Some(children[i]), + } + .one_record() + }; + + match &cx[node.kind] { + Rule::Concat(_) => { + let (left, right) = forest.one_split(node).map_err(|_| Ambiguity(self))?; + rec(one_child_record(left, 0)? + .fields + .into_iter() + .chain(one_child_record(right, 1)?.fields) + .collect()) + } + Rule::Or(rules) => { + let child = forest.one_choice(node).map_err(|_| Ambiguity(self))?; + // FIXME(eddyb) use `IndexSet` in `Rule::Or`. + let i = rules.iter().position(|&rule| child.kind == rule).unwrap(); + one_child_record(child, i) + } + Rule::Opt(_) => match forest.unpack_opt(node) { + Some(child) => one_child_record(child, 0), + None => rec(IndexMap::new()), + }, + _ => unreachable!("not an aggregate"), + } + } + + pub fn all_records(self) -> Vec> { + let forest = self.forest; + let rec = |fields| Record { forest, fields }; + let cx = forest.grammar.cx; + + let mut node = self.node; + let fields = self.fields.unwrap_or_else(|| match cx[node.kind] { + Rule::Call(name) => { + if let NodeShape::Alias(inner) = forest.grammar.node_shape(node.kind) { + node.kind = inner; + } + forest.grammar.grammar.rules[&name].fields + } + _ => unreachable!("not a record"), + }); + + let children = match &cx[fields] { + Fields::Leaf(field) => { + return vec![rec(field + .into_iter() + .map(|field| { + ( + field.name, + ( + node, + if cx[field.sub] == Fields::Leaf(None) { + // HACK(eddyb) figure out a nicer way to communicate leaves. + None + } else { + Some(field.sub) + }, + ), + ) + }) + .collect())]; + } + Fields::Aggregate(children) => children, + }; + let all_child_records = |child, i| { + Handle { + forest, + node: child, + fields: Some(children[i]), + } + .all_records() + }; + + match &cx[node.kind] { + Rule::Concat(_) => forest + .all_splits(node) + .flat_map(|(left, right)| { + all_child_records(left, 0) + .into_iter() + .flat_map(move |left| { + all_child_records(right, 1).into_iter().map(move |right| { + rec(left + .fields + .iter() + .map(|(&name, &field)| (name, field)) + .chain(right.fields) + .collect()) + }) + }) + }) + .collect(), + Rule::Or(rules) => { + forest + .all_choices(node) + .flat_map(|child| { + // FIXME(eddyb) use `IndexSet` in `Rule::Or`. + let i = rules.iter().position(|&rule| child.kind == rule).unwrap(); + all_child_records(child, i) + }) + .collect() + } + Rule::Opt(_) => match forest.unpack_opt(node) { + Some(child) => all_child_records(child, 0), + None => vec![rec(IndexMap::new())], + }, + _ => unreachable!("not an aggregate"), + } + } +} + +pub struct ListHandle<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> { + pub forest: &'a ParseForest<'i, IRule, SlowBruteforceInterpreter<'a, Pat>, I>, + tree: Rc>, +} + +impl Clone for ListHandle<'_, '_, Pat, I> { + fn clone(&self) -> Self { + ListHandle { + forest: self.forest, + tree: self.tree.clone(), + } + } +} + +impl<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> Iterator for ListHandle<'a, 'i, Pat, I> { + type Item = Result, Ambiguity>>; + + fn next(&mut self) -> Option { + match &self.tree.kind { + DynExpandedTreeKind::Opt(Some(more)) => { + let more = self.forest.unpack_alias(more.node); + match DynExpandedTree::one_from_node(self.forest, more) { + Ok(more) => self.tree = more, + Err(_) => { + return Some(Err(Ambiguity(Handle { + forest: self.forest, + node: more, + fields: None, + }))) + } + } + } + DynExpandedTreeKind::Opt(None) => return None, + _ => {} + } + match &self.tree.kind { + DynExpandedTreeKind::Concat([elem, tail]) => { + let elem = Handle { + forest: self.forest, + node: elem.node, + fields: None, + }; + + self.tree = tail.clone(); + loop { + match &self.tree.kind { + // HACK(eddyb) this only works because it's handled first + // in the next `::next` call, even if + // it might be otherwise not the right rule. + DynExpandedTreeKind::Opt(None) => return Some(Ok(elem)), + DynExpandedTreeKind::Opt(Some(tail)) + | DynExpandedTreeKind::Concat([_, tail]) => { + self.tree = tail.clone(); + } + DynExpandedTreeKind::Leaf => { + *self = Handle { + forest: self.forest, + node: self.tree.node, + fields: None, + } + .as_list(); + return Some(Ok(elem)); + } + _ => unreachable!(), + } + } + } + _ => unreachable!(), + } + } +} + +impl fmt::Debug for ListHandle<'_, '_, Pat, I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + struct Spread(T); + impl fmt::Debug for Spread { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("...(")?; + self.0.fmt(f)?; + f.write_str(")") + } + } + + let mut f = f.debug_list(); + for x in self.clone() { + match x { + Ok(elem) => { + f.entry(&elem); + } + Err(Ambiguity(tail)) => { + f.entry(&Spread(tail)); + break; + } + } + } + f.finish() + } +} + +pub struct Record<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> { + pub forest: &'a ParseForest<'i, IRule, SlowBruteforceInterpreter<'a, Pat>, I>, + fields: IndexMap, Option)>, +} + +impl<'a, 'i, Pat: Eq + Hash + fmt::Debug, I: Input> Record<'a, 'i, Pat, I> { + pub fn get(&self, name: IStr) -> Option> { + self.fields.get(&name).map(|&(node, fields)| Handle { + forest: self.forest, + node, + fields, + }) + } + + pub fn get_by_str(&self, name: &str) -> Option> { + self.get(self.forest.grammar.cx.intern(name)) + } +} + +impl fmt::Debug for Record<'_, '_, Pat, I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + struct FieldName<'a>(&'a str); + impl fmt::Debug for FieldName<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.0) + } + } + + let forest = self.forest; + let cx = forest.grammar.cx; + + if self.fields.is_empty() { + return f.write_str("_"); + } + + let mut f = f.debug_map(); + for (&name, &(node, subfields)) in &self.fields { + let field = Handle { + forest, + node, + fields: subfields, + }; + f.entry(&FieldName(&cx[name]), &field); + } + f.finish() + } +} diff --git a/tests/basic.rs b/tests/basic.rs new file mode 100644 index 0000000..c8c38cf --- /dev/null +++ b/tests/basic.rs @@ -0,0 +1,219 @@ +#![deny(rust_2018_idioms)] + +use std::fs::File; + +macro_rules! testcases { + ($($name:ident { $($grammar:tt)* }: $($rule:ident($input:expr) => $expected:expr),* ;)*) => { + $(#[test] + fn $name() { + let cx = &grammer::scannerless::Context::new(); + let grammar = &grammer::lyg::parse( + cx, + stringify!($($grammar)*).parse::().unwrap(), + ).unwrap(); + grammar.check(cx); + + $( + let rule = cx.intern(stringify!($rule)); + let result = grammer::slow_bruteforce_interpreter::parse(cx, grammar, rule, $input); + if let Ok(result) = &result { + result.with(|result| { + result.forest + .dump_graphviz( + Some(result.node), + &mut File::create(concat!( + env!("CARGO_MANIFEST_DIR"), + "/target/", + stringify!($name), + "-forest.dot" + )).unwrap(), + ).unwrap(); + }); + } + + let result = match &result { + Ok(result) => format!("{:#?}", result), + Err(grammer::parser::ParseError { + at, + expected, + }) => { + format!("{:?}: error: expected {:?}", at, expected) + } + }; + + assert!( + result == $expected, + "mismatched output, expected:\n{}\n\nfound:\n{}", + $expected, + result + ); + )*})* + }; +} + +testcases![ + gll10_g0 { + S = X:{ a:A s:S d:"d" } | + Y:{ b:B s:S } | + Z:{}; + + A = A:"a" | + C:"c"; + + B = A:"a" | + B:"b"; + }: + S("aad") => "\ +1:1-1:4 => S({ + X: 1:1-1:4 => { + a: 1:1-1:2 => A({ + A: 1:1-1:2, + }), + s: 1:2-1:3 => S({ + Y: 1:2-1:3 => { + b: 1:2-1:3 => B({ + A: 1:2-1:3, + }), + s: 1:3-1:3 => S({ + Z: 1:3-1:3, + }), + }, + }), + d: 1:3-1:4, + }, +}) | S({ + Y: 1:1-1:4 => { + b: 1:1-1:2 => B({ + A: 1:1-1:2, + }), + s: 1:2-1:4 => S({ + X: 1:2-1:4 => { + a: 1:2-1:3 => A({ + A: 1:2-1:3, + }), + s: 1:3-1:3 => S({ + Z: 1:3-1:3, + }), + d: 1:3-1:4, + }, + }), + }, +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + S("aax") => r#"1:3: error: expected ["a", "b", "c", "d"]"#; + + gll10_g0_opaque { + S = { a:A s:S "d" } | + { b:B s:S } | + {}; + A = "a" | "c"; + B = "a" | "b"; + }: + S("aad") => "\ +1:1-1:4 => S({ + a: 1:1-1:2 => A(_), + s: 1:2-1:3 => S({ + b: 1:2-1:3 => B(_), + s: 1:3-1:3 => S(_), + }), +}) | S({ + b: 1:1-1:2 => B(_), + s: 1:2-1:4 => S({ + a: 1:2-1:3 => A(_), + s: 1:3-1:3 => S(_), + }), +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + S("aax") => r#"1:3: error: expected ["a", "b", "c", "d"]"#; + + gll13_g1 { + S = X:{ a:"a" s:S b:"b" } | + Y:{ "d" } | + Z:{ a:"a" d:"d" b:"b" }; + }: + S("adb") => "\ +1:1-1:4 => S({ + X: 1:1-1:4 => { + a: 1:1-1:2, + s: 1:2-1:3 => S({ + Y: 1:2-1:3, + }), + b: 1:3-1:4, + }, +}) | S({ + Z: 1:1-1:4 => { + a: 1:1-1:2, + d: 1:2-1:3, + b: 1:3-1:4, + }, +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + S("aax") => r#"1:3: error: expected ["a", "d"]"#; + + gll15_g0 { + A = X:{ a:"a" x:A b:"b" } | + Y:{ a:"a" x:A c:"c" } | + Z:{ "a" }; + }: + A("aac") => "\ +1:1-1:4 => A({ + Y: 1:1-1:4 => { + a: 1:1-1:2, + x: 1:2-1:3 => A({ + Z: 1:2-1:3, + }), + c: 1:3-1:4, + }, +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + A("aax") => r#"1:3: error: expected ["a", "b", "c"]"#; + + gll15_g0_nested { + A = X:{ a:"a" { x:A b:"b" } } | + Y:{ a:"a" x:A c:"c" } | + Z:{ "a" "" }; + }: + A("aab") => "\ +1:1-1:4 => A({ + X: 1:1-1:4 => { + a: 1:1-1:2, + x: 1:2-1:3 => A({ + Z: 1:2-1:3, + }), + b: 1:3-1:4, + }, +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + A("aax") => r#"1:3: error: expected ["a", "b", "c"]"#; + + repeat_many_trailing { + A = elems:"a"* %% "b"; + }: + A("abab") => "\ +1:1-1:5 => A({ + elems: 1:1-1:5 => [ + 1:1-1:2, + 1:3-1:4, + ], +})", + A("aba") => "\ +1:1-1:4 => A({ + elems: 1:1-1:4 => [ + 1:1-1:2, + 1:3-1:4, + ], +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + A("b") => r#"1:1: error: expected ["a"]"#; + + nested_or { + A = x:"x" { a:"a" | b:"b" }; + }: + A("xa") => "\ +1:1-1:3 => A({ + x: 1:1-1:2, + a: 1:2-1:3, +})", +// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list. + A("xy") => r#"1:2: error: expected ["a", "b"]"#; +]; diff --git a/tests/json.rs b/tests/json.rs new file mode 100644 index 0000000..824179f --- /dev/null +++ b/tests/json.rs @@ -0,0 +1,162 @@ +#![deny(rust_2018_idioms)] + +const GRAMMAR: &str = stringify!( + Value = + | Null:"null" + | False:"false" + | True:"true" + | Literal:LITERAL + | Array:{ "[" elems:Value* % "," "]" } + | Object:{ "{" fields:Field* % "," "}" } + | InterpolateRust:{ "(" TOKEN_TREE+ ")" } + ; + Field = name:IDENT ":" value:Value; +); + +fn json_like_testcase(input: &str, expected: &str) { + let cx = &grammer::proc_macro::Context::new(); + let mut grammar = grammer::proc_macro::builtin(cx); + grammar.extend( + grammer::lyg::parse( + cx, + GRAMMAR.parse::().unwrap(), + ) + .unwrap(), + ); + grammar.check(cx); + + let tokens = input.parse::().unwrap(); + + let rule = cx.intern("Value"); + let result = grammer::slow_bruteforce_interpreter::parse(cx, &grammar, rule, tokens); + let result = match &result { + Ok(result) => format!("{:#?}", result), + Err(grammer::parser::ParseError { at, expected }) => { + format!("{:?}: error: expected {:?}", at, expected) + } + }; + + // HACK(eddyb) clean up the result, as we have no span info. + let result = result + .replace("Span", "?") + .replace("?..? => ", "") + .replace("?..?", "?"); + + assert!( + result == expected, + "mismatched output, expected:\n{}\n\nfound:\n{}", + expected, + result + ); +} + +#[test] +fn json_like_success() { + let input = stringify! { + // Example from `serde_json`. + { + name: "John Doe", + age: 43, + address: { + street: "10 Downing Street", + city: "London" + }, + phones: [ + "+44 1234567", + "+44 2345678" + ], + + test: [null, false, true, (format!("{:?}", Some(1 + 2)))] + } + }; + + let expected = "\ +Value({ + Object: { + fields: [ + Field({ + name: IDENT(_), + value: Value({ + Literal: LITERAL(_), + }), + }), + Field({ + name: IDENT(_), + value: Value({ + Literal: LITERAL(_), + }), + }), + Field({ + name: IDENT(_), + value: Value({ + Object: { + fields: [ + Field({ + name: IDENT(_), + value: Value({ + Literal: LITERAL(_), + }), + }), + Field({ + name: IDENT(_), + value: Value({ + Literal: LITERAL(_), + }), + }), + ], + }, + }), + }), + Field({ + name: IDENT(_), + value: Value({ + Array: { + elems: [ + Value({ + Literal: LITERAL(_), + }), + Value({ + Literal: LITERAL(_), + }), + ], + }, + }), + }), + Field({ + name: IDENT(_), + value: Value({ + Array: { + elems: [ + Value({ + Null: ?, + }), + Value({ + False: ?, + }), + Value({ + True: ?, + }), + Value({ + InterpolateRust: ?, + }), + ], + }, + }), + }), + ], + }, +})"; + + json_like_testcase(input, expected); +} + +#[test] +fn json_like_error() { + let input = stringify! { + stray_identifier + }; + + let expected = r#"?: error: expected ["(", "[", "{", "false", "null", "true", LITERAL]"#; + + json_like_testcase(input, expected); +}