From c48f03ac99760606772550afd201648526ea3265 Mon Sep 17 00:00:00 2001 From: Milad Irannejad Date: Wed, 25 Dec 2024 15:53:27 -0500 Subject: [PATCH] Add cfg package for context-free grammars --- cfg/cfg.go | 729 +++++++++++++++++ cfg/cfg_test.go | 1690 ++++++++++++++++++++++++++++++++++++++++ cfg/production.go | 274 +++++++ cfg/production_test.go | 716 +++++++++++++++++ cfg/string.go | 158 ++++ cfg/string_test.go | 165 ++++ cfg/symbol.go | 82 ++ cfg/symbol_test.go | 68 ++ 8 files changed, 3882 insertions(+) create mode 100644 cfg/cfg.go create mode 100644 cfg/cfg_test.go create mode 100644 cfg/production.go create mode 100644 cfg/production_test.go create mode 100644 cfg/string.go create mode 100644 cfg/string_test.go create mode 100644 cfg/symbol.go create mode 100644 cfg/symbol_test.go diff --git a/cfg/cfg.go b/cfg/cfg.go new file mode 100644 index 0000000..7d86e4f --- /dev/null +++ b/cfg/cfg.go @@ -0,0 +1,729 @@ +// Package cfg implements data structures and algorithms for context-free grammars. +// +// Context-free grammars can express a wide range of programming language constructs +// while remaining computationally efficient to parse. +// They are used in computer science and linguistics to describe the syntax of languages. +// +// A context-free grammar G = (V, Σ, R, S) is defined by four sets: +// +// 1. V is a set of terminal symbols from which strings are formed. +// Terminal symbols are also referred to as tokens. +// +// 2. Σ is a set of non-terminals symbols that denote sets of strings. +// Non-terminal symbols are sometimes called syntactic variables. +// Non-terminals impose a hierarchical structure on the language. +// +// 3. R = V × (V ∪ Σ)* is a set of productions, where each production consists of +// a non-terminal (head), an arrow, and a sequence of terminals and/or non-terminals (body). +// +// 4. S ∈ V is one of the non-terminal symbols designated as the start symbol. +// The set of strings denoted by the start symbol is the language generated by the grammar. +// +// Context-free languages are a superset of regular languages and they are more expressive. +package cfg + +import ( + "bytes" + "errors" + "fmt" + + . "github.com/moorara/algo/generic" + "github.com/moorara/algo/set" + "github.com/moorara/algo/sort" + "github.com/moorara/algo/symboltable" +) + +var ( + newSuffixes = []string{ + "′", // Prime U+2032 + "″", // Double Prime (U+2033) + "‴", // Triple Prime (U+2034) + "⁗", // Quadruple Prime (U+2057) + "₁", // Subscript One (U+2081) + "₂", // Subscript Two (U+2082) + "₃", // Subscript Three (U+2083) + "₄", // Subscript Four (U+2084) + "₅", // Subscript Five (U+2085) + "₆", // Subscript Six (U+2086) + "₇", // Subscript Seven (U+2087) + "₈", // Subscript Eight (U+2088) + "₉", // Subscript Nine (U+2089) + "_new", + } +) + +// CFG represents a context-free grammar in formal language theory. +type CFG struct { + Terminals set.Set[Terminal] + NonTerminals set.Set[NonTerminal] + Productions Productions + Start NonTerminal +} + +// New creates a new context-free grammar. +func New(terms []Terminal, nonTerms []NonTerminal, prods []Production, start NonTerminal) CFG { + g := CFG{ + Terminals: set.New(eqTerminal), + NonTerminals: set.New(eqNonTerminal), + Productions: NewProductions(), + Start: start, + } + + g.Terminals.Add(terms...) + g.NonTerminals.Add(nonTerms...) + g.Productions.Add(prods...) + + return g +} + +// verify takes a context-free grammar and determines whether or not it is valid. +// If the given grammar is invalid, an error with a descriptive message will be returned. +func (g CFG) Verify() error { + var err error + + getPredicate := func(n NonTerminal) Predicate1[Production] { + return func(p Production) bool { + return p.Head.Equals(n) + } + } + + // Check if the start symbol is in the set of non-terminal symbols. + if !g.NonTerminals.Contains(g.Start) { + err = errors.Join(err, fmt.Errorf("start symbol %s not in the set of non-terminal symbols", g.Start)) + } + + // Check if there is at least one production rule for the start symbol. + if !g.Productions.AnyMatch(getPredicate(g.Start)) { + err = errors.Join(err, fmt.Errorf("no production rule for start symbol %s", g.Start)) + } + + // Check if there is at least one prodcution rule for every non-terminal symbol. + for n := range g.NonTerminals.All() { + if !g.Productions.AnyMatch(getPredicate(n)) { + err = errors.Join(err, fmt.Errorf("no production rule for non-terminal symbol %s", n)) + } + } + + for p := range g.Productions.All() { + // Check if the head of production rule is in the set of non-terminal symbols. + if !g.NonTerminals.Contains(p.Head) { + err = errors.Join(err, fmt.Errorf("production head %s not in the set of non-terminal symbols", p.Head)) + } + + // Check if every symbol in the body of production rule is either in the set of terminal or non-terminal symbols. + for _, s := range p.Body { + if v, ok := s.(Terminal); ok && !g.Terminals.Contains(v) { + err = errors.Join(err, fmt.Errorf("terminal symbol %s not in the set of terminal symbols", v)) + } + + if v, ok := s.(NonTerminal); ok && !g.NonTerminals.Contains(v) { + err = errors.Join(err, fmt.Errorf("non-terminal symbol %s not in the set of non-terminal symbols", v)) + } + } + } + + return err +} + +// Equals determines whether or not two context-free grammars are the same. +func (g CFG) Equals(rhs CFG) bool { + return g.Terminals.Equals(rhs.Terminals) && + g.NonTerminals.Equals(rhs.NonTerminals) && + g.Productions.Equals(rhs.Productions) && + g.Start.Equals(rhs.Start) +} + +// Clone returns a deep copy of a context-free grammar, ensuring the clone is independent of the original. +func (g CFG) Clone() CFG { + return CFG{ + Terminals: g.Terminals.Clone(), + NonTerminals: g.NonTerminals.Clone(), + Productions: g.Productions.Clone(), + Start: g.Start, + } +} + +// NullableNonTerminals finds all non-terminal symbols in a context-free grammar +// that can derive the empty string ε in one or more steps (A ⇒* ε for some non-terminal A). +func (g CFG) NullableNonTerminals() set.Set[NonTerminal] { + // Define a set for all non-terminals that can derive the empty string ε + nullable := set.New(eqNonTerminal) + + for updated := true; updated; { + updated = false + + // Iterate through each production rule of the form A → α, + // where A is a non-terminal symbol and α is a string of terminals and non-terminals. + for head, list := range g.Productions.AllByHead() { + // Skip the production rule if A is already in the nullable set. + if nullable.Contains(head) { + continue + } + + for p := range list.All() { + if p.IsEmpty() { + // α is the empty string ε, add A to the nullable set. + nullable.Add(p.Head) + updated = true + } else if n := p.Body.NonTerminals(); len(n) == len(p.Body) && nullable.Contains(n...) { + // α consists of only non-terminal symbols already in the nullable set, add A to the nullable set. + nullable.Add(p.Head) + updated = true + } + } + } + } + + return nullable +} + +// EliminateEmptyProductions converts a context-free grammar into an equivalent ε-free grammar. +// +// An empty production (ε-production) is any production of the form A → ε. +func (g CFG) EliminateEmptyProductions() CFG { + nullable := g.NullableNonTerminals() + + newG := CFG{ + Terminals: g.Terminals.Clone(), + NonTerminals: g.NonTerminals.Clone(), + Productions: NewProductions(), + Start: g.Start, + } + + // Iterate through each production rule in the input grammar. + // For each production rule of the form A → α, + // generate all possible combinations of α by including and excluding nullable non-terminals. + for p := range g.Productions.All() { + // Ignore ε-production rules (A → ε) + // Only consider the production rules of the form A → α + if p.IsEmpty() { + continue + } + + // bodies holds all possible combinations of the right-hand side of a production rule. + bodies, aux := []String[Symbol]{ε}, []String[Symbol]{} + + // Every nullable non-terminal symbol creates two possibilities, once by including and once by excluding it. + for _, sym := range p.Body { + v, ok := sym.(NonTerminal) + nonTermNullable := ok && nullable.Contains(v) + + for _, β := range bodies { + if nonTermNullable { + aux = append(aux, β) + } + aux = append(aux, append(β, sym)) + } + + bodies, aux = aux, nil + } + + for _, β := range bodies { + // Skip ε-production rules (A → ε) + if len(β) > 0 { + newG.Productions.Add(Production{p.Head, β}) + } + } + } + + // The set data structure automatically prevents duplicate items from being added. + // Therefore, we don't need to worry about deduplicating the new production rules at this stage. + + // If the start symbol of the grammer is nullable (S ⇒* ε), + // a new start symbol with an ε-production rule must be introduced (S′ → S | ε). + // This guarantees that the resulting grammar generates the same language as the original grammar. + if start := newG.Start; nullable.Contains(start) { + newStart, ok := newG.addNewNonTerminal(start, newSuffixes...) + if !ok { + panic(fmt.Sprintf("Failed to generate a new non-terminal for %s", start)) + } + + newG.Start = newStart + newG.Productions.Add(Production{newStart, String[Symbol]{start}}) // S′ → S + newG.Productions.Add(Production{newStart, ε}) // S′ → ε + } + + return newG +} + +// EliminateSingleProductions converts a context-free grammar into an equivalent single-production-free grammar. +// +// A single production a.k.a. unit production is a production rule whose body is a single non-terminal symbol (A → B). +func (g CFG) EliminateSingleProductions() CFG { + // Identify all single productions. + singleProds := map[NonTerminal][]NonTerminal{} + for p := range g.Productions.All() { + if p.IsSingle() { + singleProds[p.Head] = append(singleProds[p.Head], p.Body[0].(NonTerminal)) + } + } + + // Compute the transitive closure for all non-terminal symbols. + // The transitive closure of a non-terminal A is the the set of all non-terminals B + // such that there exists a sequence of single productions starting from A and reaching B (i.e., A → B₁ → B₂ → ... → B). + + closure := make(map[NonTerminal]map[NonTerminal]bool, g.NonTerminals.Size()) + + // Initially, each non-terminal symbol is reachable from itself. + for A := range g.NonTerminals.All() { + closure[A] = map[NonTerminal]bool{A: true} + } + + // Next, add directly reachable non-terminal symbols from single productions. + for A, nonTerms := range singleProds { + for _, B := range nonTerms { + closure[A][B] = true + } + } + + // Repeat until no new non-terminal symbols can be added to the closure set. + for updated := true; updated; { + updated = false + + for A, closureA := range closure { + for B := range closureA { + for next := range closure[B] { + if !closureA[next] { + closure[A][next] = true + updated = true + } + } + } + } + } + + newG := CFG{ + Terminals: g.Terminals.Clone(), + NonTerminals: g.NonTerminals.Clone(), + Productions: NewProductions(), + Start: g.Start, + } + + // For each production rule p of the form B → α, add a new production rule A → α + // if p is not a single production and B is in the transitive closure set of A. + for A, closureA := range closure { + for B := range closureA { + for p := range g.Productions.Get(B).All() { + // Skip single productions + if !p.IsSingle() { + newG.Productions.Add(Production{A, p.Body}) + } + } + } + } + + return newG +} + +// EliminateUnreachableProductions converts a context-free grammar into an equivalent grammar +// with all unreachable productions and their associated non-terminal symbols removed. +// +// An unreachable production refers to a production rule in a grammar +// that cannot be used to derive any string starting from the start symbol. +func (g CFG) EliminateUnreachableProductions() CFG { + reachable := set.New(eqNonTerminal, g.Start) + + // Reppeat until no new non-terminal is added to reachable: + // For each production rule of the form A → α: + // If A is in reachable, add all non-terminal in α to reachable. + for updated := true; updated; { + updated = false + + for p := range g.Productions.All() { + if reachable.Contains(p.Head) { + for _, n := range p.Body.NonTerminals() { + if !reachable.Contains(n) { + reachable.Add(n) + updated = true + } + } + } + } + } + + newG := CFG{ + Terminals: g.Terminals.Clone(), + NonTerminals: reachable, + Productions: NewProductions(), + Start: g.Start, + } + + // Only consider the reachable production rules. + for p := range g.Productions.All() { + if reachable.Contains(p.Head) { + newG.Productions.Add(p) + } + } + + return newG +} + +// EliminateCycles converts a context-free grammar into an equivalent cycle-free grammar. +// +// A grammar is cyclic if it has derivations of one or more steps in which A ⇒* A for some non-terminal A. +func (g CFG) EliminateCycles() CFG { + // Single productions (unit productions) can create cycles in a grammar. + // Eliminating empty productions (ε-productions) may introduce additional single productions, + // so it is necessary to eliminate empty productions first, followed by single productions. + // After removing single productions, some productions may become unreachable. + // These unreachable productions should then be removed from the grammar. + return g.EliminateEmptyProductions().EliminateSingleProductions().EliminateUnreachableProductions() +} + +// EliminateLeftRecursion converts a context-free grammar into an equivalent grammar with no left recursion. +// +// A grammar is left-recursive if it has a non-terminal A such that there is a derivation A ⇒+ Aα for some string. +// For top-down parsers, left recursion causes the parser to loop forever. +// Many bottom-up parsers also will not accept left-recursive grammars. +// +// Note that the resulting non-left-recursive grammar may have ε-productions. +func (g CFG) EliminateLeftRecursion() CFG { + // Define predicates for identifying left-recursive and non-left-recursive productions + isLeftRecursivePredicate := func(p Production) bool { return p.IsLeftRecursive() } + isNotLeftRecursivePredicate := func(p Production) bool { return !p.IsLeftRecursive() } + + // The algorithm implemented here is guaranteed to work if the grammar has no cycles or ε-productions. + newG := g.EliminateCycles() + + // Arrange the non-terminals in some order. + // The exact order does not affect the eliminition of left recursions (immediate or indirect), + // but the resulting grammar can depend on the order in which non-terminals are processed. + _, _, nonTerms := newG.orderNonTerminals() + + for i := 0; i < len(nonTerms); i++ { + for j := 0; j < i-1; j++ { + /* + * Replace each production of the form Aᵢ → Aⱼγ by the productions Aᵢ → δ₁γ | δ₂γ | ... | δₖγ, + * where Aⱼ → δ₁ | δ₂ | ... | δₖ are all current Aⱼ-productions. + */ + + Ai, Aj := nonTerms[i], nonTerms[j] + AiProds, AjProds := newG.Productions.Get(Ai), newG.Productions.Get(Aj) + + AiAjProds := AiProds.SelectMatch(func(p Production) bool { + return len(p.Body) > 0 && p.Body[0].Equals(Aj) + }) + + for AiAjProd := range AiAjProds.All() { + newG.Productions.Remove(AiAjProd) + for AjProd := range AjProds.All() { + p := Production{Ai, AjProd.Body.Concat(AiAjProd.Body[1:])} + newG.Productions.Add(p) + } + } + } + + /* + * Immediate left recursion can be eliminated by the following technique, + * which works for any number of A-productions. + * + * First, group the productions as + * + * A → Aα₁ | Aα₂ | ... | Aαₘ | β₁ | β₂ | ... | βₙ + * + * where no αᵢ is ε and no βᵢ begins with an A. Then replace A-productions by + * + * A → β₁A′ | β₂A′ | ... | βₙA′ + * A′ → α₁A′ | α₂A′ | ... | αₘA′ | ε + */ + + A := nonTerms[i] + AProds := newG.Productions.Get(A) + hasLR := AProds.AnyMatch(isLeftRecursivePredicate) + + if hasLR { + Anew, ok := newG.addNewNonTerminal(A, newSuffixes...) + if !ok { + panic(fmt.Sprintf("Failed to generate a new non-terminal for %s", A)) + } + + LRProds := AProds.SelectMatch(isLeftRecursivePredicate) // Immediately Left-Recursive A-productions + nonLRProds := AProds.SelectMatch(isNotLeftRecursivePredicate) // Not Immediately Left-Recursive A-productions + + // Remove A → Aα₁ | Aα₂ | ... | Aαₘ | β₁ | β₂ | ... | βₙ + newG.Productions.RemoveAll(A) + + // Add A → β₁A′ | β₂A′ | ... | βₙA′ + for nonLRProd := range nonLRProds.All() { + newG.Productions.Add(Production{A, nonLRProd.Body.Append(Anew)}) + } + + // Single productions of the form A → A, where α = ε, are already eliminated. + // Add A′ → α₁A′ | α₂A′ | ... | αₘA′ | ε + for LRProd := range LRProds.All() { + newG.Productions.Add(Production{Anew, LRProd.Body[1:].Append(Anew)}) + } + + // Add A′ → ε + newG.Productions.Add(Production{Anew, ε}) + } + } + + return newG +} + +// LeftFactor converts a context-free grammar into an equivalent left-factored grammar. +// +// Left factoring is a grammar transformation for producing a grammar suitable predictive for top-down parsing. +// When the choice between two alternative A-productions is not clear, +// we may be able to rewrite the productions to defer the decision +// until enough of the input has been seen that we can make the right choice. +// +// For example, if we have the two productions +// +// 𝑠𝑡𝑚𝑡 → 𝐢𝐟 𝑒𝑥𝑝𝑟 𝐭𝐡𝐞𝐧 𝑠𝑡𝑚𝑡 𝐞𝐥𝐬𝐞 𝑠𝑡𝑚𝑡 +// | 𝐢𝐟 𝑒𝑥𝑝𝑟 𝐭𝐡𝐞𝐧 𝑠𝑡𝑚𝑡 +// +// on seeing the input 𝐢𝐟, we cannot immediately tell which productions to choose to expand 𝑠𝑡𝑚𝑡. +// +// Note that the resulting left-factored grammar may have ε-productions and/or single productions. +func (g CFG) LeftFactor() CFG { + /* + * For each non-terminal A, find the longest prefix α common to two or more A-productions. + * If α ≠ ε, there is a non-trivial common prefix, replace all of the A-productions + * + * A → αβ₁ | αβ₂ | ... | αβₙ | γ + * + * where γ represents all the alternative productions that do not being with α, by + * + * A → αA′ | γ + * A′ → β₁ | β₂ | ... | βₙ + * + * We repeatedly apply this transformation until + * no two alternative productions for a non-terminal have a common prefix. + */ + + newG := g.Clone() + + for updated := true; updated; { + updated = false + + for A, AProds := range newG.Productions.AllByHead() { + // Group production bodies by their common prefixes. + groups := groupByCommonPrefix(AProds) + + // Select groups with two or more suffixes. + // These correspond to A-productions A → αβ₁ | αβ₂ | ... | αβₙ + prefixGroups := groups.SelectMatch(func(prefix String[Symbol], suffixes set.Set[String[Symbol]]) bool { + return suffixes.Size() >= 2 + }) + + // Select groups with exactly one suffix. + // These correspond to alternative A-productions A → γ + altGroups := groups.SelectMatch(func(prefix String[Symbol], suffixes set.Set[String[Symbol]]) bool { + return suffixes.Size() == 1 + }) + + if prefixGroups.Size() > 0 && altGroups.Size() > 0 { + // Remove all A-productions A → αβ₁ | αβ₂ | ... | αβₙ | γ + AProds.RemoveAll() + + for prefix, suffixes := range prefixGroups.All() { + Anew, ok := newG.addNewNonTerminal(A, newSuffixes...) + if !ok { + panic(fmt.Sprintf("Failed to generate a new non-terminal for %s", A)) + } + + // Add A-production A → αA′ + newG.Productions.Add(Production{A, prefix.Append(Anew)}) + + // Add A′-productions A′ → β₁ | β₂ | ... | βₙ + for suffix := range suffixes.All() { + newG.Productions.Add(Production{Anew, suffix}) + } + } + + // Add alternative A-productions A → γ + for prefix, suffixes := range altGroups.All() { + for suffix := range suffixes.All() { + newG.Productions.Add(Production{A, prefix.Concat(suffix)}) + } + } + } + } + } + + return newG +} + +// groupByCommonPrefix groups production bodies by their common prefixes. +// It prioritizes shorter prefixes that encompass more suffixes and production bodies +// over longer prefixes that encompass fewer suffixes or production bodies. +func groupByCommonPrefix(prods set.Set[Production]) symboltable.SymbolTable[String[Symbol], set.Set[String[Symbol]]] { + // Define a map of prefixes to their corresponding suffixes. + groups := symboltable.NewQuadraticHashTable[String[Symbol], set.Set[String[Symbol]]]( + HashFuncForSymbolString(nil), + eqString, + eqStringSet, + symboltable.HashOpts{}, + ) + + for prod := range prods.All() { + prefixFound := false + + // Attempt to find an existing prefix for the current production body. + for prefix := range groups.All() { + // Compute the longest common prefix between the current production body and an existing prefix in the groups. + commonPrefix := String[Symbol]{} + for i := 0; i < len(prefix) && i < len(prod.Body) && prefix[i].Equals(prod.Body[i]); i++ { + commonPrefix = commonPrefix.Append(prefix[i]) + } + + // If a common prefix is found, + // add the remaining part of the current production body as a suffix to the prefix group. + if len(commonPrefix) > 0 { + suffix := prod.Body[len(commonPrefix):] + suffixes, _ := groups.Get(commonPrefix) + suffixes.Add(suffix) + prefixFound = true + break + } + } + + // If no matching prefix is found, + // initialize a new prefix with the first symbol of the production body and store the remaining part as the suffix. + if !prefixFound { + var prefix, suffix String[Symbol] + if prod.IsEmpty() { + prefix, suffix = ε, ε + } else { + prefix, suffix = prod.Body[:1], prod.Body[1:] + } + + suffixes := set.New[String[Symbol]](eqString, suffix) + groups.Put(prefix, suffixes) + } + } + + return groups +} + +// ChomskyNormalForm converts a context-free grammar into an equivalent grammar in Chomsky Normal Form. +// +// A grammar is in Chomsky Normal Form (CNF) if every production is either of the form A → BC or A → a, +// where A, B, and C are non-terminal symbols, and a is a terminal symbol +// (with the possible exception of the empty string derived from the start symbol, S → ε). +func (g CFG) ChomskyNormalForm() CFG { + newG := g.Clone() + + return newG +} + +// String returns a string representation of a context-free grammar. +func (g CFG) String() string { + var b bytes.Buffer + + terms := g.orderTerminals() + visited, unvisited, nonTerms := g.orderNonTerminals() + + fmt.Fprintf(&b, "Terminal Symbols: %s\n", terms) + fmt.Fprintf(&b, "Non-Terminal Symbols: %s\n", nonTerms) + fmt.Fprintf(&b, "Start Symbol: %s\n", g.Start) + fmt.Fprintln(&b, "Production Rules:") + + for _, head := range visited { + fmt.Fprintf(&b, " %s → ", head) + for _, p := range g.Productions.Order(head) { + fmt.Fprintf(&b, "%s | ", p.Body.String()) + } + b.Truncate(b.Len() - 3) + fmt.Fprintln(&b) + } + + for _, head := range unvisited { + fmt.Fprintf(&b, " %s → ", head) + for _, p := range g.Productions.Order(head) { + fmt.Fprintf(&b, "%s | ", p.Body.String()) + } + b.Truncate(b.Len() - 3) + fmt.Fprintln(&b) + } + + return b.String() +} + +// addNewNonTerminal generates and adds a new non-terminal symbol to the grammar. +// It does so by appending each of the provided suffixes to the given prefix, in order, +// until it finds a non-terminal that does not already exist in the set of non-terminals. +// +// The function returns the first new non-terminal added, along with a boolean indicating success. +// If all generated non-terminals already exist, it returns an empty non-terminal and false. +func (g CFG) addNewNonTerminal(prefix NonTerminal, suffixes ...string) (NonTerminal, bool) { + for _, suffix := range suffixes { + nonTerm := NonTerminal(string(prefix) + suffix) + if !g.NonTerminals.Contains(nonTerm) { + g.NonTerminals.Add(nonTerm) + return nonTerm, true + } + } + + return NonTerminal(""), false +} + +// orderTerminals orders the unordered set of grammar terminals in a deterministic way. +// +// The goal of this function is to ensure a consistent and deterministic order for any given set of terminals. +func (g CFG) orderTerminals() String[Terminal] { + terms := make(String[Terminal], 0) + for t := range g.Terminals.All() { + terms = append(terms, t) + } + + // Sort terminals alphabetically based on the string representation of them. + sort.Quick[Terminal](terms, cmpTerminal) + + return terms +} + +// orderTerminals orders the unordered set of grammar non-terminals in a deterministic way. +// +// The goal of this function is to ensure a consistent and deterministic order for any given set of non-terminals. +func (g CFG) orderNonTerminals() (String[NonTerminal], String[NonTerminal], String[NonTerminal]) { + visited := make(String[NonTerminal], 0) + isVisited := func(n NonTerminal) bool { + for _, v := range visited { + if v == n { + return true + } + } + return false + } + + visited = append(visited, g.Start) + + // Reppeat until no new non-terminal is added to visited: + // For each production rule of the form A → α: + // If A is in visited, add all non-terminal in α to visited. + for updated := true; updated; { + updated = false + for head := range g.Productions.AllByHead() { + for _, p := range g.Productions.Order(head) { + if isVisited(p.Head) { + for _, n := range p.Body.NonTerminals() { + if !isVisited(n) { + visited = append(visited, n) + updated = true + } + } + } + } + } + } + + // Identify any unvisited non-terminals in the grammar. + unvisited := make(String[NonTerminal], 0) + for n := range g.NonTerminals.All() { + if !isVisited(n) { + unvisited = append(unvisited, n) + } + } + + // Sort unvisited non-terminals alphabetically based on the string representation of them. + sort.Quick[NonTerminal](unvisited, cmpNonTerminal) + + allNonTerms := make(String[NonTerminal], 0) + allNonTerms = append(allNonTerms, visited...) + allNonTerms = append(allNonTerms, unvisited...) + + return visited, unvisited, allNonTerms +} diff --git a/cfg/cfg_test.go b/cfg/cfg_test.go new file mode 100644 index 0000000..175266c --- /dev/null +++ b/cfg/cfg_test.go @@ -0,0 +1,1690 @@ +package cfg + +import ( + "testing" + + "github.com/moorara/algo/set" + "github.com/stretchr/testify/assert" +) + +var grammars = []CFG{ + New( + []Terminal{"0", "1"}, + []NonTerminal{"S", "X", "Y"}, + []Production{ + {"S", String[Symbol]{NonTerminal("X"), NonTerminal("Y"), NonTerminal("X")}}, // S → XYX + {"X", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // X → 0X + {"X", ε}, // X → ε + {"Y", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // Y → 1Y + {"Y", ε}, // Y → ε + }, + "S", + ), + New( + []Terminal{"a", "b"}, + []NonTerminal{"S"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + "S", + ), + New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("B"), Terminal("a")}}, // S → aBa + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S", String[Symbol]{Terminal("a")}}, // S → a + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"A", ε}, // A → ε + {"B", String[Symbol]{NonTerminal("A")}}, // B → A + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + New( + []Terminal{"b", "c", "d", "s"}, + []NonTerminal{"S", "A", "B", "C", "D"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A")}}, // S → A + {"S", String[Symbol]{Terminal("s")}}, // S → s + {"A", String[Symbol]{NonTerminal("B")}}, // A → B + {"B", String[Symbol]{NonTerminal("C")}}, // B → C + {"B", String[Symbol]{Terminal("b")}}, // B → b + {"C", String[Symbol]{NonTerminal("D")}}, // C → D + {"D", String[Symbol]{Terminal("d")}}, // D → d + }, + "S", + ), + New( + []Terminal{"a", "b", "c", "d"}, + []NonTerminal{"S", "A", "B", "C", "D"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A"), NonTerminal("B")}}, // S → AB + {"A", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // A → aA + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // B → bB + {"B", String[Symbol]{Terminal("b")}}, // B → b + {"C", String[Symbol]{Terminal("c"), NonTerminal("C")}}, // C → cC + {"C", String[Symbol]{Terminal("c")}}, // C → c + {"D", String[Symbol]{Terminal("d")}}, // D → d + }, + "S", + ), + New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + "S", + ), + New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "T", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decl", "token", "rule", "lhs", "rhs", "nonterm", "term"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("decls"), NonTerminal("decl")}}, // decls → decls decl + {"decls", ε}, // decls → ε + {"decl", String[Symbol]{NonTerminal("token")}}, // decl → token + {"decl", String[Symbol]{NonTerminal("rule")}}, // decl → rule + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // token → TOKEN "=" STRING + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // token → TOKEN "=" REGEX + {"rule", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // rule → lhs "=" rhs + {"lhs", String[Symbol]{NonTerminal("nonterm")}}, // lhs → nonterm + {"rhs", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs")}}, // rhs → rhs rhs + {"rhs", String[Symbol]{NonTerminal("rhs"), Terminal("|"), NonTerminal("rhs")}}, // rhs → rhs "|" rhs + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")")}}, // rhs → "(" rhs ")" + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]")}}, // rhs → "[" rhs "]" + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}")}}, // rhs → "{" rhs "}" + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}")}}, // rhs → "{{" rhs "}}" + {"rhs", String[Symbol]{NonTerminal("nonterm")}}, // rhs → nonterm + {"rhs", String[Symbol]{NonTerminal("term")}}, // rhs → term + {"nonterm", String[Symbol]{Terminal("IDENT")}}, // nonterm → IDENT + {"term", String[Symbol]{Terminal("TOKEN")}}, // term → TOKEN + {"term", String[Symbol]{Terminal("STRING")}}, // term → STRING + }, + "grammar", + ), +} + +func TestNew(t *testing.T) { + tests := []struct { + name string + terms []Terminal + nonTerms []NonTerminal + prods []Production + start NonTerminal + }{ + { + name: "MatchingPairs", + terms: []Terminal{"a", "b"}, + nonTerms: []NonTerminal{"S"}, + prods: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S → aSb + {"S", ε}, // S → ε + }, + start: "S", + }, + { + name: "WellformedParantheses", + terms: []Terminal{"(", ")"}, + nonTerms: []NonTerminal{"S"}, + prods: []Production{ + {"S", String[Symbol]{NonTerminal("S"), NonTerminal("S")}}, // S → SS + {"S", String[Symbol]{Terminal("("), NonTerminal("S"), Terminal(")")}}, // S → (S) + {"S", String[Symbol]{Terminal("("), Terminal(")")}}, // S → () + }, + start: "S", + }, + { + name: "WellformedParanthesesAndBrackets", + terms: []Terminal{"(", ")", "[", "]"}, + nonTerms: []NonTerminal{"S"}, + prods: []Production{ + {"S", String[Symbol]{NonTerminal("S"), NonTerminal("S")}}, // S → SS + {"S", String[Symbol]{Terminal("("), NonTerminal("S"), Terminal(")")}}, // S → (S) + {"S", String[Symbol]{Terminal("["), NonTerminal("S"), Terminal("]")}}, // S → [S] + {"S", String[Symbol]{Terminal("("), Terminal(")")}}, // S → () + {"S", String[Symbol]{Terminal("["), Terminal("]")}}, // S → [] + }, + start: "S", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := New(tc.terms, tc.nonTerms, tc.prods, tc.start) + assert.NotEmpty(t, g) + }) + } +} + +func TestCFG_Verify(t *testing.T) { + tests := []struct { + name string + g CFG + expectedError string + }{ + { + name: "StartSymbolNotDeclared", + g: New( + []Terminal{}, + []NonTerminal{}, + []Production{}, + "S", + ), + expectedError: "start symbol S not in the set of non-terminal symbols\nno production rule for start symbol S", + }, + { + name: "StartSymbolHasNoProduction", + g: New( + []Terminal{}, + []NonTerminal{"S"}, + []Production{}, + "S", + ), + expectedError: "no production rule for start symbol S\nno production rule for non-terminal symbol S", + }, + { + name: "NonTerminalHasNoProduction", + g: New( + []Terminal{}, + []NonTerminal{"A", "S"}, + []Production{ + {"S", ε}, // S → ε + }, + "S", + ), + expectedError: "no production rule for non-terminal symbol A", + }, + { + name: "ProductionHeadNotDeclared", + g: New( + []Terminal{}, + []NonTerminal{"A", "S"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A")}}, // S → A + {"A", ε}, // A → ε + {"B", ε}, // B → ε + }, + "S", + ), + expectedError: "production head B not in the set of non-terminal symbols", + }, + { + name: "TerminalNotDeclared", + g: New( + []Terminal{}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A")}}, // S → A + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", ε}, // B → ε + }, + "S", + ), + expectedError: "terminal symbol \"a\" not in the set of terminal symbols", + }, + { + name: "NonTerminalNotDeclared", + g: New( + []Terminal{"a"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A")}}, // S → A + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{NonTerminal("C")}}, // B → C + }, + "S", + ), + expectedError: "non-terminal symbol C not in the set of non-terminal symbols", + }, + { + name: "Valid", + g: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A")}}, // S → A + {"S", String[Symbol]{NonTerminal("B")}}, // S → B + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + expectedError: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.g.Verify() + + if tc.expectedError == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.expectedError) + } + }) + } +} + +func TestCFG_Equals(t *testing.T) { + tests := []struct { + name string + lhs CFG + rhs CFG + expectedEquals bool + }{ + { + name: "TerminalsNotEqual", + lhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{}, + "S", + ), + rhs: New( + []Terminal{"a", "b", "c"}, + []NonTerminal{"A", "B", "S"}, + []Production{}, + "S", + ), + expectedEquals: false, + }, + { + name: "NonTerminalsNotEqual", + lhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "C", "S"}, + []Production{}, + "S", + ), + rhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{}, + "S", + ), + expectedEquals: false, + }, + { + name: "ProductionsNotEqual", + lhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // S → aA + {"S", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // S → bB + {"A", String[Symbol]{Terminal("a"), NonTerminal("S")}}, // A → aS + {"A", String[Symbol]{Terminal("b"), NonTerminal("A")}}, // A → bA + {"A", ε}, // A → ε + {"B", String[Symbol]{Terminal("b"), NonTerminal("S")}}, // B → bS + {"B", String[Symbol]{Terminal("a"), NonTerminal("B")}}, // B → aB + {"B", ε}, // B → ε + }, + "S", + ), + rhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // S → aA + {"S", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // S → bB + {"A", String[Symbol]{Terminal("a"), NonTerminal("S")}}, // A → aS + {"A", String[Symbol]{Terminal("b"), NonTerminal("A")}}, // A → bA + {"B", String[Symbol]{Terminal("b"), NonTerminal("S")}}, // B → bS + {"B", String[Symbol]{Terminal("a"), NonTerminal("B")}}, // B → aB + {"B", ε}, // B → ε + }, + "S", + ), + expectedEquals: false, + }, + { + name: "StartSymbolsNotEqual", + lhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // S → aA + {"S", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // S → bB + {"A", String[Symbol]{Terminal("a"), NonTerminal("S")}}, // A → aS + {"A", String[Symbol]{Terminal("b"), NonTerminal("A")}}, // A → bA + {"A", ε}, // A → ε + {"B", String[Symbol]{Terminal("b"), NonTerminal("S")}}, // B → bS + {"B", String[Symbol]{Terminal("a"), NonTerminal("B")}}, // B → aB + {"B", ε}, // B → ε + }, + "S", + ), + rhs: New( + []Terminal{"a", "b"}, + []NonTerminal{"A", "B", "S"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // S → aA + {"S", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // S → bB + {"A", String[Symbol]{Terminal("a"), NonTerminal("S")}}, // A → aS + {"A", String[Symbol]{Terminal("b"), NonTerminal("A")}}, // A → bA + {"A", ε}, // A → ε + {"B", String[Symbol]{Terminal("b"), NonTerminal("S")}}, // B → bS + {"B", String[Symbol]{Terminal("a"), NonTerminal("B")}}, // B → aB + {"B", ε}, // B → ε + }, + "A", + ), + expectedEquals: false, + }, + { + name: "Equal", + lhs: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "T", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + rhs: New( + []Terminal{"id", "(", ")", "+", "-", "*", "/"}, + []NonTerminal{"F", "T", "E", "S"}, + []Production{ + {"F", String[Symbol]{Terminal("id")}}, // F → id + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + }, + "S", + ), + expectedEquals: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedEquals, tc.lhs.Equals(tc.rhs)) + }) + } +} + +func TestCFG_Clone(t *testing.T) { + tests := []struct { + name string + g CFG + }{ + { + name: "OK", + g: grammars[1], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + newG := tc.g.Clone() + assert.False(t, newG == tc.g) + assert.True(t, newG.Equals(tc.g)) + }) + } +} + +func TestCFG_NullableNonTerminals(t *testing.T) { + tests := []struct { + name string + g CFG + expectedNullables []NonTerminal + }{ + { + name: "1st", + g: grammars[0], + expectedNullables: []NonTerminal{"S", "X", "Y"}, + }, + { + name: "2nd", + g: grammars[1], + expectedNullables: []NonTerminal{"S"}, + }, + { + name: "3rd", + g: grammars[2], + expectedNullables: []NonTerminal{"A", "B"}, + }, + { + name: "4th", + g: grammars[3], + expectedNullables: []NonTerminal{}, + }, + { + name: "5th", + g: grammars[4], + expectedNullables: []NonTerminal{}, + }, + { + name: "6th", + g: grammars[5], + expectedNullables: []NonTerminal{}, + }, + { + name: "7th", + g: grammars[6], + expectedNullables: []NonTerminal{}, + }, + { + name: "8th", + g: grammars[7], + expectedNullables: []NonTerminal{"decls"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + nullables := tc.g.NullableNonTerminals() + + for nullable := range nullables.All() { + assert.Contains(t, tc.expectedNullables, nullable) + } + + for _, expectedNullable := range tc.expectedNullables { + assert.True(t, nullables.Contains(expectedNullable)) + } + }) + } +} + +func TestCFG_EliminateEmptyProductions(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: New( + []Terminal{"0", "1"}, + []NonTerminal{"S′", "S", "X", "Y"}, + []Production{ + {"S′", String[Symbol]{NonTerminal("S")}}, // S′ → S + {"S′", ε}, // S′ → ε + {"S", String[Symbol]{NonTerminal("X"), NonTerminal("Y"), NonTerminal("X")}}, // S → XYX + {"S", String[Symbol]{NonTerminal("X"), NonTerminal("X")}}, // S → XX + {"S", String[Symbol]{NonTerminal("X"), NonTerminal("Y")}}, // S → XY + {"S", String[Symbol]{NonTerminal("Y"), NonTerminal("X")}}, // S → YX + {"S", String[Symbol]{NonTerminal("X")}}, // S → X + {"S", String[Symbol]{NonTerminal("Y")}}, // S → Y + {"X", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // X → 0X + {"X", String[Symbol]{Terminal("0")}}, // X → 0 + {"Y", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // Y → 1Y + {"Y", String[Symbol]{Terminal("1")}}, // Y → 1 + }, + "S′", + ), + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S′", "S"}, + []Production{ + {"S′", String[Symbol]{NonTerminal("S")}}, // S′ → S + {"S′", ε}, // S′ → ε + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S → aSb + {"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("S")}}, // S → abS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a")}}, // S → bSa + {"S", String[Symbol]{Terminal("b"), Terminal("a"), NonTerminal("S")}}, // S → baS + {"S", String[Symbol]{Terminal("a"), Terminal("b")}}, // S → ab + {"S", String[Symbol]{Terminal("b"), Terminal("a")}}, // S → ba + }, + "S′", + ), + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("B"), Terminal("a")}}, // S → aBa + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S", String[Symbol]{Terminal("a"), Terminal("a")}}, // S → aa + {"S", String[Symbol]{Terminal("a")}}, // S → a + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"B", String[Symbol]{NonTerminal("A")}}, // B → A + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: grammars[3], + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: grammars[4], + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: grammars[5], + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: grammars[6], + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decl", "token", "rule", "lhs", "rhs", "nonterm", "term"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name")}}, // grammar → name + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("decls"), NonTerminal("decl")}}, // decls → decls decl + {"decls", String[Symbol]{NonTerminal("decl")}}, // decls → decl + {"decl", String[Symbol]{NonTerminal("token")}}, // decl → token + {"decl", String[Symbol]{NonTerminal("rule")}}, // decl → rule + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // token → TOKEN "=" STRING + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // token → TOKEN "=" REGEX + {"rule", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // rule → lhs "=" rhs + {"lhs", String[Symbol]{NonTerminal("nonterm")}}, // lhs → nonterm + {"rhs", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs")}}, // rhs → rhs rhs + {"rhs", String[Symbol]{NonTerminal("rhs"), Terminal("|"), NonTerminal("rhs")}}, // rhs → rhs "|" rhs + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")")}}, // rhs → "(" rhs ")" + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]")}}, // rhs → "[" rhs "]" + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}")}}, // rhs → "{" rhs "}" + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}")}}, // rhs → "{{" rhs "}}" + {"rhs", String[Symbol]{NonTerminal("nonterm")}}, // rhs → nonterm + {"rhs", String[Symbol]{NonTerminal("term")}}, // rhs → term + {"nonterm", String[Symbol]{Terminal("IDENT")}}, // nonterm → IDENT + {"term", String[Symbol]{Terminal("TOKEN")}}, // term → TOKEN + {"term", String[Symbol]{Terminal("STRING")}}, // term → STRING + }, + "grammar", + ), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.EliminateEmptyProductions() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_EliminateSingleProductions(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: grammars[0], + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: grammars[1], + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("B"), Terminal("a")}}, // S → aBa + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S", String[Symbol]{Terminal("a")}}, // S → a + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"A", ε}, // A → ε + {"B", String[Symbol]{Terminal("b")}}, // B → b + {"B", ε}, // B → ε + }, + "S", + ), + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: New( + []Terminal{"b", "c", "d", "s"}, + []NonTerminal{"S", "A", "B", "C", "D"}, + []Production{ + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"S", String[Symbol]{Terminal("d")}}, // S → d + {"S", String[Symbol]{Terminal("s")}}, // S → s + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"A", String[Symbol]{Terminal("d")}}, // A → d + {"B", String[Symbol]{Terminal("b")}}, // B → b + {"B", String[Symbol]{Terminal("d")}}, // B → d + {"C", String[Symbol]{Terminal("d")}}, // C → d + {"D", String[Symbol]{Terminal("d")}}, // D → d + }, + "S", + ), + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: grammars[4], + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // S → E + E + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // S → E - E + {"S", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // S → E * E + {"S", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // S → E / E + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // S → - E + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + "S", + ), + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "T", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // S → E + T + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // S → E - T + {"S", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // S → T * F + {"S", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // S → T / F + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // E → T * F + {"E", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // E → T / F + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("id")}}, // E → id + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // T → ( E ) + {"T", String[Symbol]{Terminal("id")}}, // T → id + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decl", "token", "rule", "lhs", "rhs", "nonterm", "term"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("decls"), NonTerminal("decl")}}, // decls → decls decl + {"decls", ε}, // decls → ε + {"decl", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // decl → lhs "=" rhs + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // decl → TOKEN "=" STRING + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // decl → TOKEN "=" REGEX + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // token → TOKEN "=" STRING + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // token → TOKEN "=" REGEX + {"rule", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // rule → lhs "=" rhs + {"lhs", String[Symbol]{Terminal("IDENT")}}, // lhs → IDENT + {"rhs", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs")}}, // rhs → rhs rhs + {"rhs", String[Symbol]{NonTerminal("rhs"), Terminal("|"), NonTerminal("rhs")}}, // rhs → rhs "|" rhs + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")")}}, // rhs → "(" rhs ")" + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]")}}, // rhs → "[" rhs "]" + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}")}}, // rhs → "{" rhs "}" + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}")}}, // rhs → "{{" rhs "}}" + {"rhs", String[Symbol]{Terminal("IDENT")}}, // rhs → IDENT + {"rhs", String[Symbol]{Terminal("TOKEN")}}, // rhs → TOKEN + {"rhs", String[Symbol]{Terminal("STRING")}}, // rhs → STRING + {"nonterm", String[Symbol]{Terminal("IDENT")}}, // nonterm → IDENT + {"term", String[Symbol]{Terminal("TOKEN")}}, // term → TOKEN + {"term", String[Symbol]{Terminal("STRING")}}, // term → STRING + }, + "grammar", + ), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.EliminateSingleProductions() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_EliminateUnreachableProductions(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: grammars[0], + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: grammars[1], + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: grammars[2], + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: grammars[3], + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: New( + []Terminal{"a", "b", "c", "d"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A"), NonTerminal("B")}}, // S → AB + {"A", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // A → aA + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // B → bB + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: grammars[5], + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: grammars[6], + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: grammars[7], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.EliminateUnreachableProductions() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_EliminateCycles(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: New( + []Terminal{"0", "1"}, + []NonTerminal{"S′", "X", "Y"}, + []Production{ + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("Y"), NonTerminal("X")}}, // S′ → XYX + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("X")}}, // S′ → XX + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("Y")}}, // S′ → XY + {"S′", String[Symbol]{NonTerminal("Y"), NonTerminal("X")}}, // S′ → YX + {"S′", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // S′ → 0X + {"S′", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // S′ → 1Y + {"S′", String[Symbol]{Terminal("0")}}, // S′ → 0 + {"S′", String[Symbol]{Terminal("1")}}, // S′ → 1 + {"S′", ε}, // S′ → ε + {"X", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // X → 0X + {"X", String[Symbol]{Terminal("0")}}, // X → 0 + {"Y", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // Y → 1Y + {"Y", String[Symbol]{Terminal("1")}}, // Y → 1 + }, + "S′", + ), + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S′", "S"}, + []Production{ + {"S′", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S′ → aSbS + {"S′", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S′ → bSaS + {"S′", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S′ → aSb + {"S′", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("S")}}, // S′ → abS + {"S′", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a")}}, // S′ → bSa + {"S′", String[Symbol]{Terminal("b"), Terminal("a"), NonTerminal("S")}}, // S′ → baS + {"S′", String[Symbol]{Terminal("a"), Terminal("b")}}, // S′ → ab + {"S′", String[Symbol]{Terminal("b"), Terminal("a")}}, // S′ → ba + {"S′", ε}, // S′ → ε + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S → aSb + {"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("S")}}, // S → abS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a")}}, // S → bSa + {"S", String[Symbol]{Terminal("b"), Terminal("a"), NonTerminal("S")}}, // S → baS + {"S", String[Symbol]{Terminal("a"), Terminal("b")}}, // S → ab + {"S", String[Symbol]{Terminal("b"), Terminal("a")}}, // S → ba + }, + "S′", + ), + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("B"), Terminal("a")}}, // S → aBa + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S", String[Symbol]{Terminal("a"), Terminal("a")}}, // S → aa + {"S", String[Symbol]{Terminal("a")}}, // S → a + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: New( + []Terminal{"b", "c", "d", "s"}, + []NonTerminal{"S"}, + []Production{ + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"S", String[Symbol]{Terminal("d")}}, // S → d + {"S", String[Symbol]{Terminal("s")}}, // S → s + }, + "S", + ), + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: New( + []Terminal{"a", "b", "c", "d"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A"), NonTerminal("B")}}, // S → AB + {"A", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // A → aA + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // B → bB + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // S → E + E + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // S → E - E + {"S", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // S → E * E + {"S", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // S → E / E + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // S → - E + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + "S", + ), + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "T", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // S → E + T + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // S → E - T + {"S", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // S → T * F + {"S", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // S → T / F + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // E → T * F + {"E", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // E → T / F + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("id")}}, // E → id + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // T → ( E ) + {"T", String[Symbol]{Terminal("id")}}, // T → id + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decl", "lhs", "rhs"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"grammar", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // grammar → GRAMMAR IDENT + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("decls"), NonTerminal("decl")}}, // decls → decls decl + {"decls", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // decls → lhs "=" rhs + {"decls", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // decls → TOKEN "=" STRING + {"decls", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // decls → TOKEN "=" REGEX + {"decl", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // decl → lhs "=" rhs + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // decl → TOKEN "=" STRING + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // decl → TOKEN "=" REGEX + {"lhs", String[Symbol]{Terminal("IDENT")}}, // lhs → IDENT + {"rhs", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs")}}, // rhs → rhs rhs + {"rhs", String[Symbol]{NonTerminal("rhs"), Terminal("|"), NonTerminal("rhs")}}, // rhs → rhs "|" rhs + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")")}}, // rhs → "(" rhs ")" + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]")}}, // rhs → "[" rhs "]" + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}")}}, // rhs → "{" rhs "}" + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}")}}, // rhs → "{{" rhs "}}" + {"rhs", String[Symbol]{Terminal("IDENT")}}, // rhs → IDENT + {"rhs", String[Symbol]{Terminal("TOKEN")}}, // rhs → TOKEN + {"rhs", String[Symbol]{Terminal("STRING")}}, // rhs → STRING + }, + "grammar", + ), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.EliminateCycles() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_EliminateLeftRecursion(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: New( + []Terminal{"0", "1"}, + []NonTerminal{"S′", "X", "Y"}, + []Production{ + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("Y"), NonTerminal("X")}}, // S′ → XYX + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("X")}}, // S′ → XX + {"S′", String[Symbol]{NonTerminal("X"), NonTerminal("Y")}}, // S′ → XY + {"S′", String[Symbol]{NonTerminal("Y"), NonTerminal("X")}}, // S′ → YX + {"S′", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // S′ → 0X + {"S′", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // S′ → 1Y + {"S′", String[Symbol]{Terminal("0")}}, // S′ → 0 + {"S′", String[Symbol]{Terminal("1")}}, // S′ → 1 + {"S′", ε}, // S′ → ε + {"X", String[Symbol]{Terminal("0"), NonTerminal("X")}}, // X → 0X + {"X", String[Symbol]{Terminal("0")}}, // X → 0 + {"Y", String[Symbol]{Terminal("1"), NonTerminal("Y")}}, // Y → 1Y + {"Y", String[Symbol]{Terminal("1")}}, // Y → 1 + }, + "S′", + ), + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S′", "S"}, + []Production{ + {"S′", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S′ → aSbS + {"S′", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S′ → bSaS + {"S′", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S′ → aSb + {"S′", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("S")}}, // S′ → abS + {"S′", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a")}}, // S′ → bSa + {"S′", String[Symbol]{Terminal("b"), Terminal("a"), NonTerminal("S")}}, // S′ → baS + {"S′", String[Symbol]{Terminal("a"), Terminal("b")}}, // S′ → ab + {"S′", String[Symbol]{Terminal("b"), Terminal("a")}}, // S′ → ba + {"S′", ε}, // S′ → ε + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b")}}, // S → aSb + {"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("S")}}, // S → abS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a")}}, // S → bSa + {"S", String[Symbol]{Terminal("b"), Terminal("a"), NonTerminal("S")}}, // S → baS + {"S", String[Symbol]{Terminal("a"), Terminal("b")}}, // S → ab + {"S", String[Symbol]{Terminal("b"), Terminal("a")}}, // S → ba + }, + "S′", + ), + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("B"), Terminal("a")}}, // S → aBa + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S", String[Symbol]{Terminal("a"), Terminal("a")}}, // S → aa + {"S", String[Symbol]{Terminal("a")}}, // S → a + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: New( + []Terminal{"b", "c", "d", "s"}, + []NonTerminal{"S"}, + []Production{ + {"S", String[Symbol]{Terminal("b")}}, // S → b + {"S", String[Symbol]{Terminal("d")}}, // S → d + {"S", String[Symbol]{Terminal("s")}}, // S → s + }, + "S", + ), + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: New( + []Terminal{"a", "b", "c", "d"}, + []NonTerminal{"S", "A", "B"}, + []Production{ + {"S", String[Symbol]{NonTerminal("A"), NonTerminal("B")}}, // S → AB + {"A", String[Symbol]{Terminal("a"), NonTerminal("A")}}, // A → aA + {"A", String[Symbol]{Terminal("a")}}, // A → a + {"B", String[Symbol]{Terminal("b"), NonTerminal("B")}}, // B → bB + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "E′"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // S → E + E + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // S → E - E + {"S", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // S → E * E + {"S", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // S → E / E + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // S → - E + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")"), NonTerminal("E′")}}, // E → ( E ) E′ + {"E", String[Symbol]{Terminal("-"), NonTerminal("E"), NonTerminal("E′")}}, // E → - E E′ + {"E", String[Symbol]{Terminal("id"), NonTerminal("E′")}}, // E → id E′ + {"E′", String[Symbol]{Terminal("+"), NonTerminal("E"), NonTerminal("E′")}}, // E′ → + E E′ + {"E′", String[Symbol]{Terminal("-"), NonTerminal("E"), NonTerminal("E′")}}, // E′ → - E E′ + {"E′", String[Symbol]{Terminal("*"), NonTerminal("E"), NonTerminal("E′")}}, // E′ → * E E′ + {"E′", String[Symbol]{Terminal("/"), NonTerminal("E"), NonTerminal("E′")}}, // E′ → / E E′ + {"E′", ε}, // E′ → ε + }, + "S", + ), + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "E′", "T", "T′", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // S → E + T + {"S", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // S → E - T + {"S", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // S → T * F + {"S", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // S → T / F + {"S", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // S → ( E ) + {"S", String[Symbol]{Terminal("id")}}, // S → id + {"E", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F"), NonTerminal("E′")}}, // E → T * F E′ + {"E", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F"), NonTerminal("E′")}}, // E → T / F E′ + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")"), NonTerminal("E′")}}, // E → ( E ) E′ + {"E", String[Symbol]{Terminal("id"), NonTerminal("E′")}}, // E → id E′ + {"E′", String[Symbol]{Terminal("+"), NonTerminal("T"), NonTerminal("E′")}}, // E′ → + T E′ + {"E′", String[Symbol]{Terminal("-"), NonTerminal("T"), NonTerminal("E′")}}, // E′ → - T E′ + {"E′", ε}, // E′ → ε + {"T", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")"), NonTerminal("T′")}}, // T → ( E ) T′ + {"T", String[Symbol]{Terminal("id"), NonTerminal("T′")}}, // T → id T′ + {"T′", String[Symbol]{Terminal("*"), NonTerminal("F"), NonTerminal("T′")}}, // T′ → * F T′ + {"T′", String[Symbol]{Terminal("/"), NonTerminal("F"), NonTerminal("T′")}}, // T′ → / F T′ + {"T′", ε}, // T′ → ε + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decls′", "decl", "lhs", "rhs", "rhs′"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"grammar", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // grammar → GRAMMAR IDENT + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs"), NonTerminal("decls′")}}, // decls → lhs "=" rhs decls′ + {"decls", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX"), NonTerminal("decls′")}}, // decls → TOKEN "=" REGEX decls′ + {"decls", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING"), NonTerminal("decls′")}}, // decls → TOKEN "=" STRING decls′ + {"decls′", String[Symbol]{NonTerminal("decl"), NonTerminal("decls′")}}, // decls′ → decl decls′ + {"decls′", ε}, // decls′ → ε + {"decl", String[Symbol]{Terminal("IDENT"), Terminal("="), NonTerminal("rhs")}}, // decl → IDENT "=" rhs + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // decl → TOKEN "=" REGEX + {"decl", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // decl → TOKEN "=" STRING + {"lhs", String[Symbol]{Terminal("IDENT")}}, // lhs → IDENT + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")"), NonTerminal("rhs′")}}, // rhs → "(" rhs ")" rhs′ + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]"), NonTerminal("rhs′")}}, // rhs → "[" rhs "]" rhs′ + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}"), NonTerminal("rhs′")}}, // rhs → "{" rhs "}" rhs′ + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}"), NonTerminal("rhs′")}}, // rhs → "{{" rhs "}}" rhs′ + {"rhs", String[Symbol]{Terminal("IDENT"), NonTerminal("rhs′")}}, // rhs → IDENT rhs′ + {"rhs", String[Symbol]{Terminal("TOKEN"), NonTerminal("rhs′")}}, // rhs → TOKEN rhs′ + {"rhs", String[Symbol]{Terminal("STRING"), NonTerminal("rhs′")}}, // rhs → STRING rhs′ + {"rhs′", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs′")}}, // rhs′ → rhs rhs′ + {"rhs′", String[Symbol]{Terminal("|"), NonTerminal("rhs"), NonTerminal("rhs′")}}, // rhs′ → "|" rhs rhs′ + {"rhs′", ε}, // rhs′ → ε + }, + "grammar", + ), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.EliminateLeftRecursion() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_LeftFactor(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: grammars[0], + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: grammars[1], + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: New( + []Terminal{"a", "b"}, + []NonTerminal{"S", "S′", "A", "B"}, + []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S′")}}, // S → aS′ + {"S", String[Symbol]{NonTerminal("A"), Terminal("b")}}, // S → Ab + {"S′", String[Symbol]{NonTerminal("B"), Terminal("a")}}, // S′ → Ba + {"S′", ε}, // S′ → ε + {"A", String[Symbol]{Terminal("b")}}, // A → b + {"A", ε}, // A → ε + {"B", String[Symbol]{NonTerminal("A")}}, // B → A + {"B", String[Symbol]{Terminal("b")}}, // B → b + }, + "S", + ), + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: grammars[3], + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: grammars[4], + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "E′"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), NonTerminal("E′")}}, // E → EE′ + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + {"E′", String[Symbol]{Terminal("+"), NonTerminal("E")}}, // E′ → + E + {"E′", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E′ → - E + {"E′", String[Symbol]{Terminal("*"), NonTerminal("E")}}, // E′ → * E + {"E′", String[Symbol]{Terminal("/"), NonTerminal("E")}}, // E′ → / E + }, + "S", + ), + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: New( + []Terminal{"+", "-", "*", "/", "(", ")", "id"}, + []NonTerminal{"S", "E", "E′", "T", "T′", "F"}, + []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), NonTerminal("E′")}}, // E → EE′ + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"E′", String[Symbol]{Terminal("+"), NonTerminal("T")}}, // E′ → + T + {"E′", String[Symbol]{Terminal("-"), NonTerminal("T")}}, // E′ → - T + {"T", String[Symbol]{NonTerminal("T"), NonTerminal("T′")}}, // T → TT′ + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"T′", String[Symbol]{Terminal("*"), NonTerminal("F")}}, // T′ → * F + {"T′", String[Symbol]{Terminal("/"), NonTerminal("F")}}, // T′ → / F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + "S", + ), + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: New( + []Terminal{"=", "|", "(", ")", "[", "]", "{", "}", "{{", "}}", "GRAMMAR", "IDENT", "TOKEN", "STRING", "REGEX"}, + []NonTerminal{"grammar", "name", "decls", "decl", "token", "rule", "lhs", "rhs", "rhs′", "nonterm", "term"}, + []Production{ + {"grammar", String[Symbol]{NonTerminal("name"), NonTerminal("decls")}}, // grammar → name decls + {"name", String[Symbol]{Terminal("GRAMMAR"), Terminal("IDENT")}}, // name → GRAMMAR IDENT + {"decls", String[Symbol]{NonTerminal("decls"), NonTerminal("decl")}}, // decls → decls decl + {"decls", ε}, // decls → ε + {"decl", String[Symbol]{NonTerminal("token")}}, // decl → token + {"decl", String[Symbol]{NonTerminal("rule")}}, // decl → rule + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("STRING")}}, // token → TOKEN "=" STRING + {"token", String[Symbol]{Terminal("TOKEN"), Terminal("="), Terminal("REGEX")}}, // token → TOKEN "=" REGEX + {"rule", String[Symbol]{NonTerminal("lhs"), Terminal("="), NonTerminal("rhs")}}, // rule → lhs "=" rhs + {"lhs", String[Symbol]{NonTerminal("nonterm")}}, // lhs → nonterm + {"rhs", String[Symbol]{NonTerminal("rhs"), NonTerminal("rhs′")}}, // rhs → rhs rhs′ + {"rhs", String[Symbol]{Terminal("("), NonTerminal("rhs"), Terminal(")")}}, // rhs → "(" rhs ")" + {"rhs", String[Symbol]{Terminal("["), NonTerminal("rhs"), Terminal("]")}}, // rhs → "[" rhs "]" + {"rhs", String[Symbol]{Terminal("{"), NonTerminal("rhs"), Terminal("}")}}, // rhs → "{" rhs "}" + {"rhs", String[Symbol]{Terminal("{{"), NonTerminal("rhs"), Terminal("}}")}}, // rhs → "{{" rhs "}}" + {"rhs", String[Symbol]{NonTerminal("nonterm")}}, // rhs → nonterm + {"rhs", String[Symbol]{NonTerminal("term")}}, // rhs → term + {"rhs′", String[Symbol]{Terminal("|"), NonTerminal("rhs")}}, // rhs′ → "|" rhs + {"rhs′", String[Symbol]{NonTerminal("rhs")}}, // rhs′ → rhs + {"nonterm", String[Symbol]{Terminal("IDENT")}}, // nonterm → IDENT + {"term", String[Symbol]{Terminal("TOKEN")}}, // term → TOKEN + {"term", String[Symbol]{Terminal("STRING")}}, // term → STRING + }, + "grammar", + ), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.LeftFactor() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestGroupByCommonPrefix(t *testing.T) { + tests := []struct { + name string + prods set.Set[Production] + expectedGroups map[string][]string + }{ + { + name: "1st", + prods: set.New[Production](eqProduction, + Production{"A", String[Symbol]{Terminal("a")}}, + Production{"A", ε}, + ), + expectedGroups: map[string][]string{ + `"a"`: {`ε`}, + `ε`: {`ε`}, + }, + }, + { + name: "2nd", + prods: set.New[Production](eqProduction, + Production{"stmt", String[Symbol]{NonTerminal("expr")}}, + Production{"stmt", String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt")}}, + Production{"stmt", String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt"), Terminal("else"), NonTerminal("stmt")}}, + ), + expectedGroups: map[string][]string{ + `"if"`: {`expr "then" stmt`, `expr "then" stmt "else" stmt`}, + `expr`: {`ε`}, + }, + }, + { + name: "3rd", + prods: set.New[Production](eqProduction, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), Terminal("d"), NonTerminal("A"), NonTerminal("B")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), Terminal("d"), NonTerminal("C"), NonTerminal("D")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), NonTerminal("E"), NonTerminal("F")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), NonTerminal("G"), NonTerminal("H")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("I"), NonTerminal("J")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("K"), NonTerminal("L")}}, + Production{"S", String[Symbol]{Terminal("a"), Terminal("b"), NonTerminal("M"), NonTerminal("N")}}, + Production{"S", String[Symbol]{Terminal("a"), NonTerminal("O"), NonTerminal("P")}}, + Production{"S", String[Symbol]{Terminal("a")}}, + Production{"S", String[Symbol]{Terminal("u"), Terminal("v"), NonTerminal("Q"), NonTerminal("R")}}, + Production{"S", String[Symbol]{Terminal("u"), Terminal("v"), Terminal("w"), NonTerminal("S"), NonTerminal("T")}}, + Production{"S", String[Symbol]{Terminal("x"), Terminal("y"), NonTerminal("U"), NonTerminal("V")}}, + Production{"S", String[Symbol]{Terminal("z"), NonTerminal("W")}}, + ), + expectedGroups: map[string][]string{ + `"a"`: {`"b" "c" "d" A B`, `"b" "c" "d" C D`, `"b" "c" E F`, `"b" "c" G H`, `"b" I J`, `"b" K L`, `"b" M N`, `O P`, `ε`}, + `"u"`: {`"v" Q R`, `"v" "w" S T`}, + `"x"`: {`"y" U V`}, + `"z"`: {`W`}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + groups := groupByCommonPrefix(tc.prods) + + for prefix, suffixes := range groups.All() { + expectedSuffixes, found := tc.expectedGroups[prefix.String()] + assert.True(t, found, "Prefix %s is not expected", prefix) + + for suffix := range suffixes.All() { + assert.Contains(t, expectedSuffixes, suffix.String(), "Suffix %s not expected for prefix %s", suffix, prefix) + } + } + }) + } +} + +func TestCFG_ChomskyNormalForm(t *testing.T) { + tests := []struct { + name string + g CFG + expectedGrammar CFG + }{ + { + name: "1st", + g: grammars[0], + expectedGrammar: grammars[0], + }, + { + name: "2nd", + g: grammars[1], + expectedGrammar: grammars[1], + }, + { + name: "3rd", + g: grammars[2], + expectedGrammar: grammars[2], + }, + { + name: "4th", + g: grammars[3], + expectedGrammar: grammars[3], + }, + { + name: "5th", + g: grammars[4], + expectedGrammar: grammars[4], + }, + { + name: "6th", + g: grammars[5], + expectedGrammar: grammars[5], + }, + { + name: "7th", + g: grammars[6], + expectedGrammar: grammars[6], + }, + { + name: "8th", + g: grammars[7], + expectedGrammar: grammars[7], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + g := tc.g.ChomskyNormalForm() + assert.True(t, g.Equals(tc.expectedGrammar)) + }) + } +} + +func TestCFG_String(t *testing.T) { + tests := []struct { + name string + g CFG + expectedString string + }{ + { + name: "1st", + g: grammars[0], + expectedString: "Terminal Symbols: \"0\" \"1\"\nNon-Terminal Symbols: S X Y\nStart Symbol: S\nProduction Rules:\n S → X Y X\n X → \"0\" X | ε\n Y → \"1\" Y | ε\n", + }, + { + name: "2nd", + g: grammars[1], + expectedString: "Terminal Symbols: \"a\" \"b\"\nNon-Terminal Symbols: S\nStart Symbol: S\nProduction Rules:\n S → \"a\" S \"b\" S | \"b\" S \"a\" S | ε\n", + }, + { + name: "3rd", + g: grammars[2], + expectedString: "Terminal Symbols: \"a\" \"b\"\nNon-Terminal Symbols: S B A\nStart Symbol: S\nProduction Rules:\n S → \"a\" B \"a\" | A \"b\" | \"a\"\n B → A | \"b\"\n A → \"b\" | ε\n", + }, + { + name: "4th", + g: grammars[3], + expectedString: "Terminal Symbols: \"b\" \"c\" \"d\" \"s\"\nNon-Terminal Symbols: S A B C D\nStart Symbol: S\nProduction Rules:\n S → A | \"s\"\n A → B\n B → C | \"b\"\n C → D\n D → \"d\"\n", + }, + { + name: "5th", + g: grammars[4], + expectedString: "Terminal Symbols: \"a\" \"b\" \"c\" \"d\"\nNon-Terminal Symbols: S A B C D\nStart Symbol: S\nProduction Rules:\n S → A B\n A → \"a\" A | \"a\"\n B → \"b\" B | \"b\"\n C → \"c\" C | \"c\"\n D → \"d\"\n", + }, + { + name: "6th", + g: grammars[5], + expectedString: "Terminal Symbols: \"(\" \")\" \"*\" \"+\" \"-\" \"/\" \"id\"\nNon-Terminal Symbols: S E\nStart Symbol: S\nProduction Rules:\n S → E\n E → E \"*\" E | E \"+\" E | E \"-\" E | E \"/\" E | \"(\" E \")\" | \"-\" E | \"id\"\n", + }, + { + name: "7th", + g: grammars[6], + expectedString: "Terminal Symbols: \"(\" \")\" \"*\" \"+\" \"-\" \"/\" \"id\"\nNon-Terminal Symbols: S E T F\nStart Symbol: S\nProduction Rules:\n S → E\n E → E \"+\" T | E \"-\" T | T\n T → T \"*\" F | T \"/\" F | F\n F → \"(\" E \")\" | \"id\"\n", + }, + { + name: "8th", + g: grammars[7], + expectedString: "Terminal Symbols: \"(\" \")\" \"=\" \"GRAMMAR\" \"IDENT\" \"REGEX\" \"STRING\" \"TOKEN\" \"[\" \"]\" \"{\" \"{{\" \"|\" \"}\" \"}}\"\nNon-Terminal Symbols: grammar name decls decl rule token lhs rhs nonterm term\nStart Symbol: grammar\nProduction Rules:\n grammar → name decls\n name → \"GRAMMAR\" \"IDENT\"\n decls → decls decl | ε\n decl → rule | token\n rule → lhs \"=\" rhs\n token → \"TOKEN\" \"=\" \"REGEX\" | \"TOKEN\" \"=\" \"STRING\"\n lhs → nonterm\n rhs → rhs \"|\" rhs | rhs rhs | \"(\" rhs \")\" | \"[\" rhs \"]\" | \"{\" rhs \"}\" | \"{{\" rhs \"}}\" | nonterm | term\n nonterm → \"IDENT\"\n term → \"STRING\" | \"TOKEN\"\n", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedString, tc.g.String()) + }) + } +} + +func TestCFG_addNewNonTerminal(t *testing.T) { + tests := []struct { + name string + g CFG + prefix NonTerminal + suffixes []string + expectedOK bool + expectedNonTerminal NonTerminal + }{ + { + name: "OK", + g: grammars[0], + prefix: NonTerminal("S"), + suffixes: []string{"_new"}, + expectedOK: true, + expectedNonTerminal: NonTerminal("S_new"), + }, + { + name: "NotOK", + g: grammars[0], + prefix: NonTerminal("S"), + suffixes: []string{""}, + expectedOK: false, + expectedNonTerminal: NonTerminal(""), + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + nonTerm, ok := tc.g.addNewNonTerminal(tc.prefix, tc.suffixes...) + assert.Equal(t, tc.expectedOK, ok) + assert.Equal(t, tc.expectedNonTerminal, nonTerm) + }) + } +} + +func TestCFG_orderTerminals(t *testing.T) { + tests := []struct { + name string + g CFG + expectedTerminals String[Terminal] + }{ + { + name: "OK", + g: grammars[4], + expectedTerminals: String[Terminal]{"a", "b", "c", "d"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + terms := tc.g.orderTerminals() + assert.Equal(t, tc.expectedTerminals, terms) + }) + } +} + +func TestCFG_orderNonTerminals(t *testing.T) { + tests := []struct { + name string + g CFG + expectedVisited String[NonTerminal] + expectedUnvisited String[NonTerminal] + expectedNonTerminals String[NonTerminal] + }{ + { + name: "OK", + g: grammars[4], + expectedVisited: String[NonTerminal]{"S", "A", "B"}, + expectedUnvisited: String[NonTerminal]{"C", "D"}, + expectedNonTerminals: String[NonTerminal]{"S", "A", "B", "C", "D"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + visited, unvisited, nonTerms := tc.g.orderNonTerminals() + assert.Equal(t, tc.expectedVisited, visited) + assert.Equal(t, tc.expectedUnvisited, unvisited) + assert.Equal(t, tc.expectedNonTerminals, nonTerms) + }) + } +} diff --git a/cfg/production.go b/cfg/production.go new file mode 100644 index 0000000..5e94de3 --- /dev/null +++ b/cfg/production.go @@ -0,0 +1,274 @@ +package cfg + +import ( + "bytes" + "fmt" + "iter" + "slices" + + . "github.com/moorara/algo/generic" + "github.com/moorara/algo/set" + "github.com/moorara/algo/sort" + "github.com/moorara/algo/symboltable" +) + +var ( + eqProduction = func(lhs, rhs Production) bool { + return lhs.Equals(rhs) + } + + eqProductionSet = func(lhs, rhs set.Set[Production]) bool { + return lhs.Equals(rhs) + } +) + +// Production represents a production rule. +// The productions of a grammar determine how the terminals and non-terminals can be combined to form strings. +type Production struct { + // Head or left side defines some of the strings denoted by the non-terminal symbol. + Head NonTerminal + // Body or right side describes one way in which strings of the non-terminal at the head can be constructed. + Body String[Symbol] +} + +// String returns a string representation of a production rule. +func (p Production) String() string { + return fmt.Sprintf("%s → %s", p.Head, p.Body) +} + +// Equals determines whether or not two production rules are the same. +func (p Production) Equals(rhs Production) bool { + return p.Head.Equals(rhs.Head) && p.Body.Equals(rhs.Body) +} + +// IsEmpty determines whether or not a production rule is an empty production (ε-production). +// +// An empty production (ε-production) is any production of the form A → ε. +func (p Production) IsEmpty() bool { + return len(p.Body) == 0 +} + +// IsSingle determines whether or not a production rule is a single production (unit production). +// +// A single production (unit production) is a production whose body is a single non-terminal (A → B). +func (p Production) IsSingle() bool { + return len(p.Body) == 1 && !p.Body[0].IsTerminal() +} + +// IsLeftRecursive determines whether or not a production rule is left recursive (immediate left recursive). +// +// A left recursive production is a production rule of the form of A → Aα +func (p Production) IsLeftRecursive() bool { + return len(p.Body) > 0 && p.Body[0].Equals(p.Head) +} + +// Productions is the interface for a set of production rules in a grammar. +type Productions interface { + fmt.Stringer + Cloner[Productions] + Equaler[Productions] + + Add(...Production) + Remove(...Production) + RemoveAll(...NonTerminal) + Get(NonTerminal) set.Set[Production] + Order(NonTerminal) []Production + All() iter.Seq[Production] + AllByHead() iter.Seq2[NonTerminal, set.Set[Production]] + AnyMatch(Predicate1[Production]) bool + AllMatch(Predicate1[Production]) bool +} + +// productions implements the Productions interface. +type productions struct { + table symboltable.SymbolTable[NonTerminal, set.Set[Production]] +} + +// NewProductions creates a new instance of the Productions. +func NewProductions() Productions { + return &productions{ + table: symboltable.NewQuadraticHashTable[NonTerminal, set.Set[Production]]( + hashNonTerminal, + eqNonTerminal, + eqProductionSet, + symboltable.HashOpts{}, + ), + } +} + +// String returns a string representation of production rules. +func (p *productions) String() string { + var b bytes.Buffer + + for head := range p.table.All() { + fmt.Fprintf(&b, "%s → ", head) + for _, q := range p.Order(head) { + fmt.Fprintf(&b, "%s | ", q.Body.String()) + } + b.Truncate(b.Len() - 3) + fmt.Fprintln(&b) + } + + return b.String() +} + +// Clone returns a deep copy of the production rules, ensuring the clone is independent of the original. +func (p *productions) Clone() Productions { + newP := NewProductions() + for q := range p.All() { + newP.Add(q) + } + + return newP +} + +// Equals determines whether or not two sets of production rules are the same. +func (p *productions) Equals(rhs Productions) bool { + q, ok := rhs.(*productions) + return ok && p.table.Equals(q.table) +} + +// Add adds a new production rule. +func (p *productions) Add(ps ...Production) { + for _, q := range ps { + if _, ok := p.table.Get(q.Head); !ok { + p.table.Put(q.Head, set.New[Production](eqProduction)) + } + + list, _ := p.table.Get(q.Head) + list.Add(q) + } +} + +// Remove removes a production rule. +func (p *productions) Remove(ps ...Production) { + for _, q := range ps { + if list, ok := p.table.Get(q.Head); ok { + list.Remove(q) + if list.IsEmpty() { + p.table.Delete(q.Head) + } + } + } +} + +// RemoveAll removes all production rules with the specified head non-terminal. +func (p *productions) RemoveAll(heads ...NonTerminal) { + for _, head := range heads { + p.table.Delete(head) + } +} + +// Get finds and returns a production rule by its head non-terminal symbol. +// It returns nil if no production rules are found for the specified head. +func (p *productions) Get(head NonTerminal) set.Set[Production] { + list, ok := p.table.Get(head) + if !ok { + return nil + } + + return list +} + +// Order orders an unordered set of production rules with the same head non-terminal in a deterministic way. +// +// The ordering criteria are as follows: +// +// 1. Productions whose bodies contain more non-terminal symbols are prioritized first. +// 2. If two productions have the same number of non-terminals, those with more terminal symbols in the body come first. +// 3. If two productions have the same number of non-terminals and terminals, they are ordered alphabetically based on the symbols in their bodies. +// +// The goal of this function is to ensure a consistent and deterministic order for any given set of production rules. +func (p *productions) Order(head NonTerminal) []Production { + // Collect all production rules into a slice from the set iterator. + prods := slices.Collect(p.Get(head).All()) + + // Sort the productions using a custom comparison function. + sort.Quick[Production](prods, func(lhs, rhs Production) int { + // First, compare based on the number of non-terminal symbols in the body. + lhsNonTermsLen, rhsNonTermsLen := len(lhs.Body.NonTerminals()), len(rhs.Body.NonTerminals()) + if lhsNonTermsLen > rhsNonTermsLen { + return -1 + } else if rhsNonTermsLen > lhsNonTermsLen { + return 1 + } + + // Next, if the number of non-terminals is the same, + // compare based on the number of terminal symbols. + lhsTermsLen, rhsTermsLen := len(lhs.Body.Terminals()), len(rhs.Body.Terminals()) + if lhsTermsLen > rhsTermsLen { + return -1 + } else if rhsTermsLen > lhsTermsLen { + return 1 + } + + // Then, if the number of terminals is also the same, + // compare alphabetically based on the string representation of the bodies. + lhsString, rhsString := lhs.String(), rhs.String() + if lhsString < rhsString { + return -1 + } else if rhsString < lhsString { + return 1 + } + + return 0 + }) + + return prods +} + +// All returns an iterator sequence containing all production rules. +func (p *productions) All() iter.Seq[Production] { + return func(yield func(Production) bool) { + for _, list := range p.table.All() { + for q := range list.All() { + if !yield(q) { + return + } + } + } + } +} + +// AllByHead returns an iterator sequence sequence of pairs, +// where each pair consists of a head non-terminal and its associated set of production rules. +func (p *productions) AllByHead() iter.Seq2[NonTerminal, set.Set[Production]] { + return p.table.All() +} + +// AnyMatch returns true if at least one production rule satisfies the provided predicate. +func (p *productions) AnyMatch(pred Predicate1[Production]) bool { + for q := range p.All() { + if pred(q) { + return true + } + } + + return false +} + +// AllMatch returns true if all production rules satisfy the provided predicate. +// If the set of production rules is empty, it returns true. +func (p *productions) AllMatch(pred Predicate1[Production]) bool { + for q := range p.All() { + if !pred(q) { + return false + } + } + + return true +} + +// SelectMatch selects a subset of production rules that satisfy the given predicate. +// It returns a new set of production rules containing the matching productions, of the same type as the original set of production rules. +func (p *productions) SelectMatch(pred Predicate1[Production]) Productions { + newP := NewProductions() + + for q := range p.All() { + if pred(q) { + newP.Add(q) + } + } + + return newP +} diff --git a/cfg/production_test.go b/cfg/production_test.go new file mode 100644 index 0000000..492e258 --- /dev/null +++ b/cfg/production_test.go @@ -0,0 +1,716 @@ +package cfg + +import ( + "testing" + + . "github.com/moorara/algo/generic" + "github.com/moorara/algo/set" + "github.com/stretchr/testify/assert" +) + +func getTestProductions() []*productions { + p0 := NewProductions().(*productions) + + p1 := NewProductions().(*productions) + p1.Add(Production{"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}) // S → aSbS + p1.Add(Production{"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}) // S → bSaS + p1.Add(Production{"S", ε}) // S → ε + + p2 := NewProductions().(*productions) + p2.Add(Production{"S", String[Symbol]{NonTerminal("E")}}) // S → E + p2.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}) // E → E + T + p2.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}) // E → E - T + p2.Add(Production{"E", String[Symbol]{NonTerminal("T")}}) // E → T + p2.Add(Production{"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}) // T → T * F + p2.Add(Production{"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}) // T → T / F + p2.Add(Production{"T", String[Symbol]{NonTerminal("F")}}) // T → F + p2.Add(Production{"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}) // F → ( E ) + p2.Add(Production{"F", String[Symbol]{Terminal("id")}}) // F → id + + p3 := NewProductions().(*productions) + p3.Add(Production{"S", String[Symbol]{NonTerminal("E")}}) // S → E + p3.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}) // E → E + E + p3.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}) // E → E - E + p3.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}) // E → E * E + p3.Add(Production{"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}) // E → E / E + p3.Add(Production{"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}) // E → ( E ) + p3.Add(Production{"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}) // E → - E + p3.Add(Production{"E", String[Symbol]{Terminal("id")}}) // E → id + + return []*productions{p0, p1, p2, p3} +} + +func TestProduction(t *testing.T) { + tests := []struct { + name string + p Production + expectedString string + expectedIsEmpty bool + expectedIsSingle bool + expectedIsLeftRecursive bool + }{ + { + name: "1st", + p: Production{"S", ε}, + expectedString: `S → ε`, + expectedIsEmpty: true, + expectedIsSingle: false, + expectedIsLeftRecursive: false, + }, + { + name: "2nd", + p: Production{"A", String[Symbol]{NonTerminal("A")}}, + expectedString: `A → A`, + expectedIsEmpty: false, + expectedIsSingle: true, + expectedIsLeftRecursive: true, + }, + { + name: "3rd", + p: Production{"A", String[Symbol]{NonTerminal("B")}}, + expectedString: `A → B`, + expectedIsEmpty: false, + expectedIsSingle: true, + expectedIsLeftRecursive: false, + }, + { + name: "4th", + p: Production{"A", String[Symbol]{NonTerminal("A"), Terminal("a")}}, + expectedString: `A → A "a"`, + expectedIsEmpty: false, + expectedIsSingle: false, + expectedIsLeftRecursive: true, + }, + { + name: "5th", + p: Production{"A", String[Symbol]{NonTerminal("A"), NonTerminal("B")}}, + expectedString: `A → A B`, + expectedIsEmpty: false, + expectedIsSingle: false, + expectedIsLeftRecursive: true, + }, + { + name: "6th", + p: Production{"stmt", String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt")}}, + expectedString: `stmt → "if" expr "then" stmt`, + expectedIsEmpty: false, + expectedIsSingle: false, + expectedIsLeftRecursive: false, + }, + } + + notEqual := Production{"😐", String[Symbol]{Terminal("🙂"), NonTerminal("🙃")}} + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedString, tc.p.String()) + assert.True(t, tc.p.Equals(tc.p)) + assert.False(t, tc.p.Equals(notEqual)) + assert.Equal(t, tc.expectedIsEmpty, tc.p.IsEmpty()) + assert.Equal(t, tc.expectedIsSingle, tc.p.IsSingle()) + assert.Equal(t, tc.expectedIsLeftRecursive, tc.p.IsLeftRecursive()) + }) + } +} + +func TestNewProductions(t *testing.T) { + t.Run("OK", func(t *testing.T) { + p := NewProductions() + assert.NotNil(t, p) + }) +} + +func TestProductions_String(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + expectedSubstrings []string + }{ + { + name: "1st", + p: p[1], + expectedSubstrings: []string{ + `S → "a" S "b" S | "b" S "a" S | ε`, + }, + }, + { + name: "2nd", + p: p[2], + expectedSubstrings: []string{ + `S → E`, + `E → E "+" T | E "-" T | T`, + `T → T "*" F | T "/" F | F`, + `F → "(" E ")" | "id"`, + }, + }, + { + name: "3rd", + p: p[3], + expectedSubstrings: []string{ + `S → E`, + `E → E "*" E | E "+" E | E "-" E | E "/" E | "(" E ")" | "-" E | "id"`, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + s := tc.p.String() + + for _, expectedSubstring := range tc.expectedSubstrings { + assert.Contains(t, s, expectedSubstring) + } + }) + } +} + +func TestProductions_Clone(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + }{ + { + name: "1st", + p: p[1], + }, + { + name: "2nd", + p: p[2], + }, + { + name: "3rd", + p: p[3], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + newP := tc.p.Clone() + assert.False(t, newP == tc.p) + assert.True(t, newP.Equals(tc.p)) + }) + } +} + +func TestProductions_Equals(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + rhs Productions + expectedEquals bool + }{ + { + name: "Equal", + p: p[2], + rhs: p[2], + expectedEquals: true, + }, + { + name: "NotEqual", + p: p[2], + rhs: p[3], + expectedEquals: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedEquals, tc.p.Equals(tc.rhs)) + }) + } +} + +func TestProductions_Add(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + ps []Production + expectedProductions *productions + }{ + { + name: "1st", + p: p[1], + ps: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + expectedProductions: p[1], + }, + { + name: "2nd", + p: p[2], + ps: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + expectedProductions: p[2], + }, + { + name: "3rd", + p: p[3], + ps: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + expectedProductions: p[3], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.p.Add(tc.ps...) + assert.True(t, tc.p.Equals(tc.expectedProductions)) + }) + } +} + +func TestProductions_Remove(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + ps []Production + expectedProductions *productions + }{ + { + name: "1st", + p: p[1], + ps: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + expectedProductions: p[0], + }, + { + name: "2nd", + p: p[2], + ps: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + expectedProductions: p[0], + }, + { + name: "3rd", + p: p[3], + ps: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + expectedProductions: p[0], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.p.Remove(tc.ps...) + assert.True(t, tc.p.Equals(tc.expectedProductions)) + }) + } +} + +func TestProductions_RemoveAll(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + heads []NonTerminal + expectedProductions *productions + }{ + { + name: "1st", + p: p[1], + heads: []NonTerminal{"S"}, + expectedProductions: p[0], + }, + { + name: "2nd", + p: p[2], + heads: []NonTerminal{"S", "E", "T", "F"}, + expectedProductions: p[0], + }, + { + name: "3rd", + p: p[3], + heads: []NonTerminal{"S", "E"}, + expectedProductions: p[0], + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.p.RemoveAll(tc.heads...) + assert.True(t, tc.p.Equals(tc.expectedProductions)) + }) + } +} + +func TestProductions_Get(t *testing.T) { + p := getTestProductions() + + s1 := set.New[Production](eqProduction, + Production{"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + Production{"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + Production{"S", ε}, // S → ε + ) + + s2 := set.New[Production](eqProduction, + Production{"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + Production{"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + Production{"T", String[Symbol]{NonTerminal("F")}}, // T → F + ) + + s3 := set.New[Production](eqProduction, + Production{"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + Production{"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + Production{"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + Production{"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + Production{"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + Production{"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + Production{"E", String[Symbol]{Terminal("id")}}, // E → id + ) + + tests := []struct { + name string + p *productions + head NonTerminal + expectedProductions set.Set[Production] + }{ + { + name: "Nil", + p: p[0], + head: NonTerminal("E"), + expectedProductions: nil, + }, + { + name: "1st", + p: p[1], + head: NonTerminal("S"), + expectedProductions: s1, + }, + { + name: "2nd", + p: p[2], + head: NonTerminal("T"), + expectedProductions: s2, + }, + { + name: "3rd", + p: p[3], + head: NonTerminal("E"), + expectedProductions: s3, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + prods := tc.p.Get(tc.head) + + if tc.expectedProductions == nil { + assert.Nil(t, prods) + } else { + assert.True(t, prods.Equals(tc.expectedProductions)) + } + }) + } +} + +func TestProductions_Order(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + head NonTerminal + expectedProductions []Production + }{ + { + name: "1st", + p: p[1], + head: NonTerminal("S"), + expectedProductions: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + }, + { + name: "2nd", + p: p[2], + head: NonTerminal("T"), + expectedProductions: []Production{ + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + }, + }, + { + name: "3rd", + p: p[3], + head: NonTerminal("E"), + expectedProductions: []Production{ + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + prods := tc.p.Order(tc.head) + assert.Equal(t, tc.expectedProductions, prods) + }) + } +} + +func TestProductions_All(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + expectedProductions []Production + }{ + { + name: "1st", + p: p[1], + expectedProductions: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + }, + { + name: "2nd", + p: p[2], + expectedProductions: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + }, + { + name: "3rd", + p: p[3], + expectedProductions: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + for p := range tc.p.All() { + assert.Contains(t, tc.expectedProductions, p) + } + }) + } +} + +func TestProductions_AllByHead(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + expectedProductions []Production + }{ + { + name: "1st", + p: p[1], + expectedProductions: []Production{ + {"S", String[Symbol]{Terminal("a"), NonTerminal("S"), Terminal("b"), NonTerminal("S")}}, // S → aSbS + {"S", String[Symbol]{Terminal("b"), NonTerminal("S"), Terminal("a"), NonTerminal("S")}}, // S → bSaS + {"S", ε}, // S → ε + }, + }, + { + name: "2nd", + p: p[2], + expectedProductions: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("T")}}, // E → E + T + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("T")}}, // E → E - T + {"E", String[Symbol]{NonTerminal("T")}}, // E → T + {"T", String[Symbol]{NonTerminal("T"), Terminal("*"), NonTerminal("F")}}, // T → T * F + {"T", String[Symbol]{NonTerminal("T"), Terminal("/"), NonTerminal("F")}}, // T → T / F + {"T", String[Symbol]{NonTerminal("F")}}, // T → F + {"F", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // F → ( E ) + {"F", String[Symbol]{Terminal("id")}}, // F → id + }, + }, + { + name: "3rd", + p: p[3], + expectedProductions: []Production{ + {"S", String[Symbol]{NonTerminal("E")}}, // S → E + {"E", String[Symbol]{NonTerminal("E"), Terminal("+"), NonTerminal("E")}}, // E → E + E + {"E", String[Symbol]{NonTerminal("E"), Terminal("-"), NonTerminal("E")}}, // E → E - E + {"E", String[Symbol]{NonTerminal("E"), Terminal("*"), NonTerminal("E")}}, // E → E * E + {"E", String[Symbol]{NonTerminal("E"), Terminal("/"), NonTerminal("E")}}, // E → E / E + {"E", String[Symbol]{Terminal("("), NonTerminal("E"), Terminal(")")}}, // E → ( E ) + {"E", String[Symbol]{Terminal("-"), NonTerminal("E")}}, // E → - E + {"E", String[Symbol]{Terminal("id")}}, // E → id + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + for head, list := range tc.p.AllByHead() { + for p := range list.All() { + assert.True(t, p.Head.Equals(head)) + assert.Contains(t, tc.expectedProductions, p) + } + } + }) + } +} + +func TestProductions_AnyMatch(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + pred Predicate1[Production] + expectedAnyMatch bool + }{ + { + name: "OK", + p: p[2], + pred: func(p Production) bool { return p.IsSingle() }, + expectedAnyMatch: true, + }, + { + name: "NotOK", + p: p[2], + pred: func(p Production) bool { return p.IsEmpty() }, + expectedAnyMatch: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + anyMatch := tc.p.AnyMatch(tc.pred) + assert.Equal(t, tc.expectedAnyMatch, anyMatch) + }) + } +} + +func TestProductions_AllMatch(t *testing.T) { + p := getTestProductions() + + tests := []struct { + name string + p *productions + pred Predicate1[Production] + expectedAllMatch bool + }{ + { + name: "OK", + p: p[2], + pred: func(p Production) bool { return !p.IsEmpty() }, + expectedAllMatch: true, + }, + { + name: "NotOK", + p: p[2], + pred: func(p Production) bool { return !p.IsSingle() }, + expectedAllMatch: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + allMatch := tc.p.AllMatch(tc.pred) + assert.Equal(t, tc.expectedAllMatch, allMatch) + }) + } +} + +func TestProductions_SelectMatch(t *testing.T) { + p := getTestProductions() + + q1 := NewProductions().(*productions) + q1.Add(Production{"S", String[Symbol]{NonTerminal("E")}}) // S → E + q1.Add(Production{"E", String[Symbol]{NonTerminal("T")}}) // E → T + q1.Add(Production{"T", String[Symbol]{NonTerminal("F")}}) // T → F + + tests := []struct { + name string + p *productions + pred Predicate1[Production] + expectedSelectMatch *productions + }{ + { + name: "OK", + p: p[2], + pred: func(p Production) bool { return p.IsSingle() }, + expectedSelectMatch: q1, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + selectMatch := tc.p.SelectMatch(tc.pred) + assert.True(t, selectMatch.Equals(tc.expectedSelectMatch)) + }) + } +} diff --git a/cfg/string.go b/cfg/string.go new file mode 100644 index 0000000..b82b81f --- /dev/null +++ b/cfg/string.go @@ -0,0 +1,158 @@ +package cfg + +import ( + "hash" + "hash/fnv" + "strings" + + . "github.com/moorara/algo/hash" + "github.com/moorara/algo/set" +) + +// The empty string ε +var ε = String[Symbol]{} + +var ( + eqString = func(lhs, rhs String[Symbol]) bool { + return lhs.Equals(rhs) + } + + eqStringSet = func(lhs, rhs set.Set[String[Symbol]]) bool { + return lhs.Equals(rhs) + } +) + +// String represent a string of grammar symbols. +type String[T Symbol] []T + +// String returns a string representation of a string of symbols. +func (s String[T]) String() string { + if len(s) == 0 { + return "ε" + } + + names := make([]string, len(s)) + for i, symbol := range s { + names[i] = symbol.String() + } + + return strings.Join(names, " ") +} + +// Equals determines whether or not two strings are the same. +func (s String[T]) Equals(rhs String[T]) bool { + if len(s) != len(rhs) { + return false + } + + for i := range s { + if !s[i].Equals(rhs[i]) { + return false + } + } + + return true +} + +// HasPrefix checks whether a string starts with the given prefix. +func (s String[T]) HasPrefix(prefix String[T]) bool { + ls, lp := len(s), len(prefix) + return ls >= lp && s[:lp].Equals(prefix) +} + +// HasSuffix checks whether a string ends with the given suffix. +func (s String[T]) HasSuffix(prefix String[T]) bool { + ls, lp := len(s), len(prefix) + return ls >= lp && s[ls-lp:].Equals(prefix) +} + +// Append appends new symbols to the current string and returns a new string +func (s String[T]) Append(symbols ...T) String[T] { + newS := make(String[T], len(s)+len(symbols)) + + copy(newS, s) + copy(newS[len(s):], symbols) + + return newS +} + +// Concat concatenates the current string with one or more strings and returns a new string. +func (s String[T]) Concat(ss ...String[T]) String[T] { + l := len(s) + for _, t := range ss { + l += len(t) + } + + newS := make(String[T], l) + + copy(newS, s) + i := len(s) + for _, t := range ss { + copy(newS[i:], t) + i += len(t) + } + + return newS +} + +// Terminals returns all terminal symbols of a string of symbols. +func (s String[Symbol]) Terminals() String[Terminal] { + terms := String[Terminal]{} + for _, sym := range s { + if v, ok := any(sym).(Terminal); ok { + terms = append(terms, v) + } + } + return terms +} + +// NonTerminals returns all non-terminal symbols of a string of symbols. +func (s String[Symbol]) NonTerminals() String[NonTerminal] { + nonTerms := String[NonTerminal]{} + for _, sym := range s { + if v, ok := any(sym).(NonTerminal); ok { + nonTerms = append(nonTerms, v) + } + } + return nonTerms +} + +// HashFuncForSymbolString creates a HashFunc for a string of symbols. +// If h is nil, a default hash.Hash64 implementation will be used. +func HashFuncForSymbolString(h hash.Hash64) HashFunc[String[Symbol]] { + if h == nil { + h = fnv.New64() + } + + return func(s String[Symbol]) uint64 { + h.Reset() + + for _, x := range s { + // Hash.Write never returns an error + _, _ = h.Write([]byte(x.String())) + } + + return h.Sum64() + } +} + +// LongestCommonPrefixOf computes the longest common prefix of a list of strings. +// If the input is empty or there is no common prefix, it returns the empty string ε. +func LongestCommonPrefixOf(ss ...String[Symbol]) String[Symbol] { + if len(ss) == 0 { + return ε + } + + lcp := ss[0] + + for i := 1; i < len(ss); i++ { + for !ss[i].HasPrefix(lcp) { + lcp = lcp[:len(lcp)-1] + if len(lcp) == 0 { + return ε + } + } + } + + return lcp +} diff --git a/cfg/string_test.go b/cfg/string_test.go new file mode 100644 index 0000000..2b00aa3 --- /dev/null +++ b/cfg/string_test.go @@ -0,0 +1,165 @@ +package cfg + +import ( + "hash" + "hash/fnv" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestString(t *testing.T) { + tests := []struct { + name string + s String[Symbol] + expectedString string + prefix String[Symbol] + expectedHasPrefix bool + suffix String[Symbol] + expectedHasSuffix bool + append []Symbol + expectedAppend String[Symbol] + concat []String[Symbol] + expectedConcat String[Symbol] + expectedTerminals String[Terminal] + expectedNonTerminals String[NonTerminal] + }{ + { + name: "Empty", + s: ε, + expectedString: `ε`, + prefix: String[Symbol]{}, + expectedHasPrefix: true, + suffix: String[Symbol]{}, + expectedHasSuffix: true, + append: []Symbol{}, + expectedAppend: ε, + concat: []String[Symbol]{ε}, + expectedConcat: ε, + expectedTerminals: String[Terminal]{}, + expectedNonTerminals: String[NonTerminal]{}, + }, + { + name: "AllTerminals", + s: String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c")}, + expectedString: `"a" "b" "c"`, + prefix: String[Symbol]{Terminal("a"), Terminal("b")}, + expectedHasPrefix: true, + suffix: String[Symbol]{Terminal("a"), Terminal("c")}, + expectedHasSuffix: false, + append: []Symbol{Terminal("d")}, + expectedAppend: String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), Terminal("d")}, + concat: []String[Symbol]{{Terminal("d"), Terminal("e"), Terminal("f")}}, + expectedConcat: String[Symbol]{Terminal("a"), Terminal("b"), Terminal("c"), Terminal("d"), Terminal("e"), Terminal("f")}, + expectedTerminals: String[Terminal]{"a", "b", "c"}, + expectedNonTerminals: String[NonTerminal]{}, + }, + { + name: "AllNonTerminals", + s: String[Symbol]{NonTerminal("A"), NonTerminal("B"), NonTerminal("C")}, + expectedString: `A B C`, + prefix: String[Symbol]{NonTerminal("A"), NonTerminal("C")}, + expectedHasPrefix: false, + suffix: String[Symbol]{NonTerminal("B"), NonTerminal("C")}, + expectedHasSuffix: true, + append: []Symbol{NonTerminal("D")}, + expectedAppend: String[Symbol]{NonTerminal("A"), NonTerminal("B"), NonTerminal("C"), NonTerminal("D")}, + concat: []String[Symbol]{{NonTerminal("D"), NonTerminal("E"), NonTerminal("F")}}, + expectedConcat: String[Symbol]{NonTerminal("A"), NonTerminal("B"), NonTerminal("C"), NonTerminal("D"), NonTerminal("E"), NonTerminal("F")}, + expectedTerminals: String[Terminal]{}, + expectedNonTerminals: String[NonTerminal]{"A", "B", "C"}, + }, + { + name: "TerminalsAndNonTerminals", + s: String[Symbol]{Terminal("a"), NonTerminal("A"), Terminal("b"), NonTerminal("B"), Terminal("c")}, + expectedString: `"a" A "b" B "c"`, + prefix: String[Symbol]{Terminal("a"), NonTerminal("A"), Terminal("b"), NonTerminal("B"), Terminal("c")}, + expectedHasPrefix: true, + suffix: String[Symbol]{Terminal("a"), NonTerminal("A"), Terminal("b"), NonTerminal("B"), Terminal("c")}, + expectedHasSuffix: true, + append: []Symbol{NonTerminal("C"), Terminal("d"), NonTerminal("D")}, + expectedAppend: String[Symbol]{Terminal("a"), NonTerminal("A"), Terminal("b"), NonTerminal("B"), Terminal("c"), NonTerminal("C"), Terminal("d"), NonTerminal("D")}, + concat: []String[Symbol]{{NonTerminal("C")}, {Terminal("d"), NonTerminal("D")}}, + expectedConcat: String[Symbol]{Terminal("a"), NonTerminal("A"), Terminal("b"), NonTerminal("B"), Terminal("c"), NonTerminal("C"), Terminal("d"), NonTerminal("D")}, + expectedTerminals: String[Terminal]{"a", "b", "c"}, + expectedNonTerminals: String[NonTerminal]{"A", "B"}, + }, + } + + notEqual := String[Symbol]{Terminal("🙂"), NonTerminal("🙃")} + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedString, tc.s.String()) + assert.True(t, tc.s.Equals(tc.s)) + assert.False(t, tc.s.Equals(notEqual)) + assert.Equal(t, tc.expectedHasPrefix, tc.s.HasPrefix(tc.prefix)) + assert.Equal(t, tc.expectedHasSuffix, tc.s.HasSuffix(tc.suffix)) + assert.Equal(t, tc.expectedAppend, tc.s.Append(tc.append...)) + assert.Equal(t, tc.expectedConcat, tc.s.Concat(tc.concat...)) + assert.Equal(t, tc.expectedTerminals, tc.s.Terminals()) + assert.Equal(t, tc.expectedNonTerminals, tc.s.NonTerminals()) + }) + } +} + +func TestHashFuncForSymbolString(t *testing.T) { + tests := []struct { + h hash.Hash64 + s String[Symbol] + expectedHash uint64 + }{ + { + h: nil, + s: String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt")}, + expectedHash: 0xb0616925421a7df6, + }, + { + h: fnv.New64(), + s: String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt"), Terminal("else"), NonTerminal("stmt")}, + expectedHash: 0xdf211ff9239df1ed, + }, + } + + for _, tc := range tests { + hash := HashFuncForSymbolString(tc.h)(tc.s) + assert.Equal(t, tc.expectedHash, hash) + } +} + +func TestLongestCommonPrefixOf(t *testing.T) { + tests := []struct { + name string + ss []String[Symbol] + expectedLongestCommonPrefix String[Symbol] + }{ + { + name: "Empty", + ss: []String[Symbol]{}, + expectedLongestCommonPrefix: ε, + }, + { + name: "NoCommonPrefix", + ss: []String[Symbol]{ + {NonTerminal("expr"), Terminal("?"), NonTerminal("stmt"), Terminal(":"), NonTerminal("stmt")}, + {Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt"), Terminal("else"), NonTerminal("stmt")}, + }, + expectedLongestCommonPrefix: ε, + }, + { + name: "CommonPrefix", + ss: []String[Symbol]{ + {Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt"), Terminal("else"), NonTerminal("stmt")}, + {Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt")}, + }, + expectedLongestCommonPrefix: String[Symbol]{Terminal("if"), NonTerminal("expr"), Terminal("then"), NonTerminal("stmt")}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + lcp := LongestCommonPrefixOf(tc.ss...) + assert.Equal(t, tc.expectedLongestCommonPrefix, lcp) + }) + } +} diff --git a/cfg/symbol.go b/cfg/symbol.go new file mode 100644 index 0000000..72f8cc0 --- /dev/null +++ b/cfg/symbol.go @@ -0,0 +1,82 @@ +package cfg + +import ( + "fmt" + + . "github.com/moorara/algo/generic" + . "github.com/moorara/algo/hash" +) + +var ( + eqTerminal = NewEqualFunc[Terminal]() + cmpTerminal = NewCompareFunc[Terminal]() + + eqNonTerminal = NewEqualFunc[NonTerminal]() + cmpNonTerminal = NewCompareFunc[NonTerminal]() + hashNonTerminal = HashFuncForString[NonTerminal](nil) +) + +// Symbol represents a grammar symbol (terminal or non-terminal). +type Symbol interface { + fmt.Stringer + + Name() string + Equals(Symbol) bool + IsTerminal() bool +} + +// Terminal represents a terminal symbol. +// Terminals are the basic symbols from which strings of a language are formed. +// Token name or token for short are equivalent to terminal. +type Terminal string + +// String returns a string representation of a terminal symbol. +func (t Terminal) String() string { + return fmt.Sprintf("%q", t.Name()) +} + +// Name returns the name of terminal symbol. +func (t Terminal) Name() string { + return string(t) +} + +// Equals determines whether or not two terminal symbols are the same. +func (t Terminal) Equals(rhs Symbol) bool { + if v, ok := rhs.(Terminal); ok { + return t == v + } + return false +} + +// IsTerminal always returns true for terminal symbols. +func (t Terminal) IsTerminal() bool { + return true +} + +// NonTerminal represents a non-terminal symbol. +// Non-terminals are syntaxtic variables that denote sets of strings. +// Non-terminals impose a hierarchical structure on a language. +type NonTerminal string + +// String returns a string representation of a non-terminal symbol. +func (n NonTerminal) String() string { + return n.Name() +} + +// Name returns the name of non-terminal symbol. +func (n NonTerminal) Name() string { + return string(n) +} + +// Equals determines whether or not two non-terminal symbols are the same. +func (n NonTerminal) Equals(rhs Symbol) bool { + if v, ok := rhs.(NonTerminal); ok { + return n == v + } + return false +} + +// IsTerminal always returns false for non-terminal symbols. +func (n NonTerminal) IsTerminal() bool { + return false +} diff --git a/cfg/symbol_test.go b/cfg/symbol_test.go new file mode 100644 index 0000000..0b9e904 --- /dev/null +++ b/cfg/symbol_test.go @@ -0,0 +1,68 @@ +package cfg + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTerminal(t *testing.T) { + tests := []struct { + value string + expectedString string + }{ + {value: "a", expectedString: `"a"`}, + {value: "b", expectedString: `"b"`}, + {value: "c", expectedString: `"c"`}, + {value: "0", expectedString: `"0"`}, + {value: "1", expectedString: `"1"`}, + {value: "2", expectedString: `"2"`}, + {value: "+", expectedString: `"+"`}, + {value: "*", expectedString: `"*"`}, + {value: "(", expectedString: `"("`}, + {value: ")", expectedString: `")"`}, + {value: "id", expectedString: `"id"`}, + {value: "if", expectedString: `"if"`}, + } + + notEqual := Terminal("🙂") + + for _, tc := range tests { + t.Run(tc.value, func(t *testing.T) { + tr := Terminal(tc.value) + assert.Equal(t, tc.expectedString, tr.String()) + assert.Equal(t, tc.value, tr.Name()) + assert.True(t, tr.Equals(Terminal(tc.value))) + assert.False(t, tr.Equals(NonTerminal(tc.value))) + assert.False(t, tr.Equals(notEqual)) + assert.True(t, tr.IsTerminal()) + }) + } +} + +func TestNonTerminal(t *testing.T) { + tests := []struct { + value string + }{ + {value: "A"}, + {value: "B"}, + {value: "C"}, + {value: "S"}, + {value: "expr"}, + {value: "stmt"}, + } + + notEqual := NonTerminal("🙂") + + for _, tc := range tests { + t.Run(tc.value, func(t *testing.T) { + n := NonTerminal(tc.value) + assert.Equal(t, tc.value, n.String()) + assert.Equal(t, tc.value, n.Name()) + assert.True(t, n.Equals(NonTerminal(tc.value))) + assert.False(t, n.Equals(Terminal(tc.value))) + assert.False(t, n.Equals(notEqual)) + assert.False(t, n.IsTerminal()) + }) + } +}