Skip to content

Commit

Permalink
implement a tool for verifying language grammars (#12)
Browse files Browse the repository at this point in the history
The `passtool` currently only performs some basic checks (i.e., whether
all references name existing rules and that no duplicate productions
are introduced).

In the future, it'll also provide code-generation facilities for
automating the creation of validation layers for languages. The idea is
to reduce the overhead of removing, changing, or adding new
intermediate languages as much as possible.

The tool is ran as part of CI to make sure all language grammars are
valid.
  • Loading branch information
zerbina authored Aug 3, 2024
1 parent 678c57f commit 162e7e6
Show file tree
Hide file tree
Showing 7 changed files with 701 additions and 5 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,28 @@ jobs:

- name: "Run tests"
run: bin/tester

languages:
name: "Check and generate languages"
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
filter: tree:0

# always use the most recent NimSkull version
- uses: nim-works/[email protected]
with:
nimskull-version: "*"

- name: Build koch
run: nim c -d:nimStrictMode --outdir:bin koch.nim

- name: Build passtool
run: bin/koch single passtool -d:nimStrictMode -d:release

# run the passtool for the highest-level language:
- name: "Check the grammar"
run: bin/passtool passes lang1
3 changes: 2 additions & 1 deletion koch.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ Commands:
single <name> [args] builds the single program with the given name
"""
Programs: seq[(string, string)] = @[
("tester", "tools/tester.nim")
("tester", "tools/tester.nim"),
("passtool", "tools/passtool/passtool.nim")
]

var
Expand Down
8 changes: 4 additions & 4 deletions passes/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ RuleCore ::= SexpMatcher | Reference | (Identifier ':' Rule)
Rule ::= RuleCore ('*' | '+' | '?')?
SexprRule ::= '(' Identifier Rule* ')'
TopRule = Reference | SexprRule
Production = Reference | SexprRule
Def ::= Identifier '::=' TopMatcher ('|' TopMatcher)*
Append ::= Identifier '+=' TopMatcher ('|' TopMatcher)*
Remove ::= Identifier '-=' TopMatcher ('|' TopMatcher)*
Def ::= Identifier '::=' Production ('|' Production)*
Append ::= Identifier '+=' Production ('|' Production)*
Remove ::= Identifier '-=' Production ('|' Production)*
Top ::= ('.extends' Identifier)? (Def | Append | Remove)*
```
Expand Down
84 changes: 84 additions & 0 deletions tools/passtool/grammar.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
## Type definitions and associated helper procedures for the grammar IR.

import
std/[tables, strutils]

type
Repeat* = enum
rOnce
rZeroOrOne
rZeroOrMore
rOneOrMore

SourcePos* = tuple
file, line, col: uint16

Rule* = object
pos*: SourcePos
## the source position
name*: string
## an optional name for the rule
repeat*: Repeat
## how many times the expression may repeat
expr*: Expr

Expr* = object
pos*: SourcePos
name*: string
## name of the reference, or symbol of the S-expression
case isRef*: bool
of false:
rules*: seq[Rule]
of true:
discard

Production* = object
source*: string
## name of the language the production comes from
expr*: Expr

Grammar* = OrderedTable[string, seq[Production]]

proc `$`*(e: Expr): string

proc `$`*(r: Rule): string =
if r.name.len > 0:
result.add r.name
result.add ":"

result.add $r.expr
case r.repeat
of rOnce:
discard "nothing to do"
of rOneOrMore:
result.add "+"
of rZeroOrOne:
result.add "?"
of rZeroOrMore:
result.add "*"

proc `$`*(e: Expr): string =
case e.isRef
of false:
result.add '('
result.add e.name
for it in e.rules.items:
result.add ' '
result.add $it
result.add ')'
of true:
result.add '<'
result.add e.name
result.add '>'

proc `$`*(g: Grammar): string =
for name, it in g.pairs:
result.add name
result.add " ::= "
let offset = name.len + 2
# format the productions in an easy-to-read way:
for i, rule in it.pairs:
if i > 0:
result.add "\n" & repeat(' ', offset) & "| "
result.add $rule.expr
result.add "\n"
Loading

0 comments on commit 162e7e6

Please sign in to comment.