From 32c3054e983ceb236221bfa774d90b49c4d2c438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois-Ren=C3=A9=20Rideau?= Date: Sun, 3 Dec 2023 21:55:13 -0500 Subject: [PATCH] Rename, test, fix, and document the LL(1) parser combinator library (#1063) std/text/basic-parsers => std/parser/ll1 --- doc/.vuepress/config.js | 8 + doc/reference/std/misc/decimal.md | 24 +- doc/reference/std/parser/README.md | 7 + doc/reference/std/parser/ll1.md | 516 +++++++++++++++++++++++++++++ doc/reference/std/sugar.md | 4 +- src/std/build-spec.ss | 2 +- src/std/misc/decimal.ss | 67 ++-- src/std/parser/ll1-test.ss | 98 ++++++ src/std/parser/ll1.ss | 253 ++++++++++++++ src/std/text/basic-parsers-test.ss | 28 -- src/std/text/basic-parsers.ss | 217 ------------ src/std/text/json/util.ss | 8 +- 12 files changed, 949 insertions(+), 283 deletions(-) create mode 100644 doc/reference/std/parser/README.md create mode 100644 doc/reference/std/parser/ll1.md create mode 100644 src/std/parser/ll1-test.ss create mode 100644 src/std/parser/ll1.ss delete mode 100644 src/std/text/basic-parsers-test.ss delete mode 100644 src/std/text/basic-parsers.ss diff --git a/doc/.vuepress/config.js b/doc/.vuepress/config.js index ea9ebc1a3..b88d20779 100644 --- a/doc/.vuepress/config.js +++ b/doc/.vuepress/config.js @@ -143,6 +143,14 @@ module.exports = { ] }, + { title: "Text Parser Libraries", + path: "/reference/std/parser/", + children: [ + "parser/", + "parser/ll1" + ] + }, + { title: "Text Encoding and Decoding Libraries", path: "/reference/std/text/", children: [ diff --git a/doc/reference/std/misc/decimal.md b/doc/reference/std/misc/decimal.md index efe0c648d..c3bab3094 100644 --- a/doc/reference/std/misc/decimal.md +++ b/doc/reference/std/misc/decimal.md @@ -54,14 +54,30 @@ i.e. a rational number that is not a floating-point number. exponent-allowed: (exponent-allowed_ #f)) -> decimal ``` -`parse-decimal` expects and parses a decimal number on an `input`, +`parse-decimal` parses a decimal number on a `input` with the options specifed via keyword arguments. -The `input` will be cast to a `BufferedStringReader` using +The `input` can be a string, input port, BufferedStringReader, StringReader, or Reader, +and will be cast to a `BufferedStringReader` using [`open-buffered-string-reader`](../stdio.md#open-buffered-string-reader). `parse-decimal` will then side-effect this reader as it parses, and finally return the decimal number, -or raises a `parse-error` (from `:std/parser/base`). +or raise a `parse-error` (from `:std/parser/base`). + +The options are as for `ll1-decimal` below. + +## ll1-decimal +```scheme +(ll1-decimal + reader + sign-allowed?: (sign-allowed? #t) + decimal-mark: (decimal-mark #\.) + group-separator: (group-separator_ #f) + exponent-allowed: (exponent-allowed_ #f)) -> decimal +``` +`parse-decimal` parses a decimal number from a `reader` object +that satisfies the `PeekableStringReader` interface, +with the options specifed via keyword arguments. The keyword arguments `decimal-mark` and `group-separator` are each a character or false, and specify optional allowed decimal mark and group separator characters, @@ -87,7 +103,7 @@ before and/or after calling `parse-decimal`. `: PeekableStringReader sign-allowed?:Bool decimal-mark:Char group-separator:(Or Char Bool) exponent-allowed:(or Bool String) -> Decimal` -You may use utilities from [:std/text/basic-parsers](../text/basic-parsers.md) +You may use utilities from [:std/parser/ll1](../parser/ll1.md) to parse decimals as part of something bigger, or just use `string->decimal` below. ## string->decimal diff --git a/doc/reference/std/parser/README.md b/doc/reference/std/parser/README.md new file mode 100644 index 000000000..4f4823064 --- /dev/null +++ b/doc/reference/std/parser/README.md @@ -0,0 +1,7 @@ +# The Parser Library + +Gerbil comes with a variety of tools for parsing. +For the main parser interface see [std/parser](../parser). + +But for the most simple parsing needs, you may want to use our +LL(1) parser library [std/parser/ll1](ll1). diff --git a/doc/reference/std/parser/ll1.md b/doc/reference/std/parser/ll1.md new file mode 100644 index 000000000..c598a3c50 --- /dev/null +++ b/doc/reference/std/parser/ll1.md @@ -0,0 +1,516 @@ +# LL(1) Parser + +With this module, you can use parser combinators to create LL(1) parsers +working over a `PeekableStringReader` input. +Build your parser with combinators then use one of `ll1/reader`, +`ll1/string`, `ll1/port`, `ll1/file` or `ll1/file-lines` to consume input. + +::: tip To use the bindings from this module: +``` scheme +(import :std/parser/ll1) +``` +::: + +Beware that the 1 in LL(1) means that these parsers only use at most +one character of look-ahead from the reader, allowing them to work on raw ports. +However, this limitation means that if an alternative in a `ll1-or` fails +further than at the start, the next alternative will start +from where the previous one failed, and not from the point at which the `ll1-or` started. +This may be a reason to prefer `ll1-case` over `ll1-or` — +or to prefer a more advanced kind of parser if this is not enough. + +## Using LL(1) Parsers + +### ll1/reader +```scheme +(ll1/reader parser reader [description] [where]) => parse-result +``` +Given a LL(1) `parser` and a `reader` satisfying the `PeekableStringReader` interface, +return the result of parsing the reader with the parser. +In case of error, use the `description` and `where` specification in the error message. + +::: tip Examples: +``` scheme +> (ll1/reader ll1-sint (open-buffered-string-reader "-3")) +-3 +``` +::: + +### ll1/string +```scheme +(ll1/string parser string [description] [where]) => parse-result +``` +Given a LL(1) `parser` and a `string`, +return the result of parsing the string with the parser. +In case of error, use the `description` and `where` specification in the error message. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-separated ll1-uint ll1-skip-space* ll1-eof) "12 3 4 56") +(12 3 4 56) +``` +::: + +### ll1/port +```scheme +(ll1/port parser port [description] [where]) => parse-result +``` +Given a LL(1) `parser` and a `port`, +return the result of parsing the port with the parser. +In case of error, use the `description` and `where` specification in the error message. + +::: tip Examples: +``` scheme +> (call-with-input-string "foo\nbar\nbaz\n" (lambda (port) (ll1/port ll1-lines port))) +("foo" "bar" "baz") +``` +::: + +### ll1/file +```scheme +(ll1/file parser path [description] [where]) => parse-result +``` +Given a LL(1) `parser` and a `path`, +return the result of parsing the file at said path with the parser. +In case of error, use the `description` and `where` specification in the error message. + +::: tip Examples: +``` scheme +> (ll1/file (ll1-result 42) "/dev/null") +42 +``` +::: + +### ll1/file-lines +```scheme +(ll1/file-lines parser path [description] [where]) => parse-result +``` +Given a LL(1) `parser` and a `path`, +return a list containing the result of parsing each line of the file with the parser. +In case of error, use the `description` and `where` specification in the error message. + +::: tip Examples: +``` scheme +> (ll1/file-lines ll1-line "/dev/null") +() +``` +::: + +## Helpers for LL(1) Parsers + +### peeker +``` +(peeker spec) => procedure +``` +Find a suitable procedure to recognize a character input (from `peek-char`), +based on specification `spec`: + + - If `spec` is a procedure, it designates itself. + - If `spec` is a character or the EOF object, it designates the function + that recognizes exactly this character. + - If `spec` is a string, it designates the function that recognizes + the elements of the string. + - If `spec` is a hash-table, it designates the function that recognizes + the keys of the table, returning the respective value. + - If `spec` is a list, each element is a specification as above, and + the function designated is one that recognizes any of the characters + recognized by one of these elements, tried in order. + +### eolf? +``` +(eolf? char-or-eof) => bool +``` +Return true if the argument is one of the character `#\newline`, +the character `#\return` or the eof-object `#!eof`. + +::: tip Examples: +``` scheme +> (andmap eolf? '(#\newline #\return #!eof)) +#t +> (ormap eolf? '(#\a #\b 1 "foo")) +#t +``` +::: + +### peekable-eof? +``` +(peekable-eof? reader) => bool +``` +Return true if the reader has reached the end of its data stream. + + +## Basic LL(1) Parsers + +### ll1-case +``` +(ll1-case (spec body ...) ... (else e ...)) => parser +(ll1-case (spec body ...) ...) => parser +``` +The `ll1-case` macro creates a LL(1) parser that +peeks at one character (of eof-object), then tries +each function specified by `spec` as per `peeker` on that character in order. +If one `spec` succeeds, it executes the `body ...` associated with that `spec`; +if that `body` starts with `=>` it is a function that takes the result of the peeker. +The body returns a function that in turn is called with the reader. +If no `spec` succeeds, the body of the `else` clause is used; +if no `else` clause is specified, a `parse-error` is raised. + +::: tip Examples: +``` scheme +> (import :std/text/char-set) +> (ll1/string + (ll1-repeated + (ll1-case (#\r (ll1-string "red")) + ("\r\n" ll1-eol) + (char-strict-whitespace? ll1-skip-space*) + (char-ascii-digit ll1-uint)) ll1-eof) + "\nred3 45\r\nred") +("\n" "red" 3 #!void 45 "\r\n" "red") +``` +::: + +Note how in the example above, the `#\newline` and `#\return` characters +are accepted by both the `"\r\n"` spec and the `char-strict-whitespace?` spec, +but the first one wins and causes `ll1-eol` to be called, returning the eol found. + +### ll1-peek +``` +(ll1-peek spec) => parser +``` +Return a parser that succeeds if the next character (or eof-object) is recognized +by the `peeker` for the `spec`, and otherwise fails. +The character is *not* read, but its value is returned. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-begin0 (ll1-peek "abc") (ll1-char* char?)) "banana") +#\b +``` +::: + +### ll1-empty +```scheme +ll1-empty +``` +This function parses the empty string and returns `(void)`. +Trivial function to use while combining parsers. + +### ll1-char +``` +(ll1-char spec) => parser +``` +Return a parser that succeeds if the next character (or eof-object) is recognized +by the `peeker` for the `spec`, and otherwise fails. +The character is read, and its value returned. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-char #\x) "x") +#\x +``` +::: + +### ll1-string +``` +(ll1-string string) => parser +``` +Return a parser that succeeds if the input contains string, +consuming it, and returning it. If some character in the stream +is not the next character in the string, raise a `parse-error`. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-string "banana") "banana") +"banana" +``` +::: + +### ll1-char? +``` +(ll1-char? spec) => parser +``` +Return a parser that always succeeds. +If the next character (or eof-object) is recognized by the `peeker` for the `spec`, +then consume and return it. Otherwise, return `#f`. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-list (ll1-char? #\b) (ll1-char? #\o) (ll1-char? #\a)) "ba") +(#\b #f #\a) +``` +::: + +### ll1-char* +``` +(ll1-char* spec) => parser +``` +Return a parser that always succeeds. +It consumes as many characters recognized by the `peeker` for the `spec` +as there are in the reader, and returns a string of them. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-char* "ban") "banana") +"banana" +``` +::: + +### ll1-char+ +``` +(ll1-char+ spec) => parser +``` +Return a parser that consumes as many characters recognized +by the `peeker` for the `spec` as there are in the reader, +and returns a string of them. If the next character is not recognized, +raise a `parse-error`. + +::: tip Examples: +``` scheme +> (ll1/string (ll1-char+ "ban") "banana") +"banana" +``` +::: + +### ll1-skip-char* +``` +(ll1-skip-char* spec) => parser +``` +Return a parser that always succeeds. +It consumes as many characters recognized by the `peeker` for the `spec` +as there are in the reader, and returns `(void)`. + +::: tip Examples: +``` scheme +> (list (ll1/string (ll1-skip-char* "ban") "banana")) +(#!void) +``` +::: + +### ll1-skip-space* +``` +ll1-skip-space* => parser +``` +Return a parser that always succeeds. +It consumes as many space characters (as per `char-strict-whitespace?`) +as there are in the reader, and returns `(void)`. + +::: tip Examples: +``` scheme +> (list (ll1/string ll1-skip-space* " ")) +(#!void) +``` +::: + +### ll1-skip-space* +``` +(ll1-skip-space* spec) => parser +``` +Return a parser that always succeeds. +It consumes as many space characters (as per `char-strict-whitespace?`) +as there are in the reader, and returns `(void)`. + +::: tip Examples: +``` scheme +> (list (ll1/string ll1-skip-space* " ")) +(#!void) +``` +::: + + +### ll1-eof +``` +ll1-eof => parser +``` +A parser that succeeds when the reader is at eof. + +### ll1-eolf? +``` +ll1-eolf? => parser +``` +A parser that succeeds when the reader is at eof or before an eol. +Does not consume the eol. + +### ll1-eol +``` +ll1-eol => parser +``` +A parser that succeeds when the reader is before an eol. +Consumes the eol and returns it: `"\n"`, `"\r"` or `"\r\n"`. + +### ll1-eolf +``` +ll1-eolf => parser +``` +A parser that succeeds when the reader is before an eol or at eof. +Consumes the eol of eof and returns it: `"\n"`, `"\r"`, `"\r\n"` or `#!eof`. + +### ll1-uint +``` +ll1-uint => parser +(cut ll1-uint <> base) => parser +``` +Parser that parses a unsigned integer in decimal or in the specified `base`, return it. +No sign may be specified. + +### ll1-sint +``` +ll1-sint => parser +(cut ll1-sint <> base) => parser +``` +Parser that parses a signed integer in decimal or in the specified `base`, return it. +A sign may be specified, or omitted. + + +### ll1-n-chars +``` +(ll1-n-chars n spec) => parser +``` +Parser that reads `n` chars that satisfy the `peeker` `spec` +and returns them as a string. + + +### ll1-n-digits +``` +(ll1-n-digits n [base]) => parser +``` +Parser that reads `n` digits in specified `base` (defaults to 10), +and returns them as a number. + + +### ll1-line +``` +ll1-line => parser +``` +Parser that reads all characters to end of line or file, and returns them in a string. +Consumes the characters in line, but not those in the eol. + + +## LL(1) Parser Combinators + +### ll1-result +``` +(ll1-result result) => parser +``` +Macro that evaluates to a parser that always returns the `result` +without consuming any input. + + +### ll1-pure +``` +(ll1-pure result) => parser +``` +Function that returns to a parser that always returns the `result` +without consuming any input. + + +### ll1-bind +``` +(ll1-bind Ma aMb) => parser +``` +Given a parser `Ma` that returns a value of type `a`, +and a unary function `aMb` that takes a value of type `a` +and returns a value of type `b`, +returns a parser that first runs `Ma`, then takes its return value, +calls `aMb` with it, runs the parser returned, +and returns the value of that parser. + +NB: `ll1-bind` and `ll1-pure` give LL(1) parsers a monad structure. + +### ll1-begin +``` +(ll1-begin parsers ... last-parser) => parser +``` +A parser that runs all the parsers, and returns the value of the last one. + +### ll1-begin0 +``` +(ll1-begin0 first-parser parsers ...) => parser +``` +A parser that runs all the parsers, and returns the value of the last one. + + +### ll1-or +``` +(ll1-or parsers ...) => parser +``` +A parser that tries each of the `parsers` in sequence. If one fails, tries the next one. +Beware that if any parser fails after the first character, +the next parser will be run from where it failed, not from the start. + +For better behavior, use a different kind of parsers than LL(1). +For safer behavior within LL(1), use `ll1-case`. + +### ll1-or/list +``` +(ll1-or list-of-parsers) => parser +``` +A parser that tries each of the parsers in the `list-of-parsers` in sequence. +If one fails, tries the next one. +Beware that if any parser fails after the first character, +the next parser will be run from where it failed, not from the start. + +For better behavior, use a different kind of parsers than LL(1). +For safer behavior within LL(1), use `ll1-case`. + + +### ll1-repeated +``` +(ll1-repeated element terminator [rhead]) => parser +``` +A parser that repeatedly tries to run the `element` parser, +unless the `terminator` parser succeeds. +In the end, return the list of results from the element parser, in order, +prepended by the reverse of the list `rhead` (defaults to the empty list `'()`). + + +### ll1-separated +``` +(ll1-separated element separator terminator [rhead]) => parser +``` +A parser that repeatedly tries to run the `element` parser +with the `separator` parser in between two runs of the `element` parser, +until `terminator` parser succeeds. +In the end, return the list of results from the `element` parser, in order, +prepended by the reverse of the list `rhead` (defaults to the empty list `'()`). +Empty lists are accepted. + +### ll1* +``` +(ll1* f . elements) => parser +``` +A parser that runs each of the parsers in `elements` in sequence, +collects their results, and calls `f` on the results. + +### ll1-list +``` +(ll1-list f . elements) => parser +``` +A parser that runs each of the parsers in `elements` in sequence, +and collects their results into a list. + +### ll1-n-times +``` +(ll1-n-times n element) => parser +``` +A parser that runs the `element` parser exactly `n` times +and collects the results into a list. + +### ll1-lines +``` +ll1-lines => parser +(cut ll1-lines <> line) => parser +``` +A parser that reads many lines each as by the `line` parser if provided or `ll1-line` by default, +and collects the parsing results in a list. + +### ll1-to-eof +``` +(ll1-to-eof parser) => parser +``` +A parser that parses results as per `parser` then fails if the EOF hasn't been reached. + +### ll1-skip-space-to-eof +``` +ll1-skip-space-to-eof => parser +``` +A parser that skips over any remaining space until EOF. diff --git a/doc/reference/std/sugar.md b/doc/reference/std/sugar.md index e3e399145..7b11bc897 100644 --- a/doc/reference/std/sugar.md +++ b/doc/reference/std/sugar.md @@ -319,7 +319,7 @@ Anaphoric `when`. Evaluates and binds *test* to *id*. Evaluates *body ...* if ::: tip Examples: ```scheme -> (import :std/text/basic-parsers) +> (import :std/text/char-set) > (def (foo c) (awhen (v (char-ascii-digit c)) (* v v))) > (foo #\3) 9 @@ -561,7 +561,7 @@ the `if-let` offered in Common Lisp by Alexandria and UIOP. ::: tip Examples: ```scheme -> (import :std/text/basic-parsers) +> (import :std/text/char-set) > (def (foo a b c) (if-let ((x (char-ascii-digit a)) (y (char-ascii-digit b)) diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index c04f59c88..72c3d8634 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -176,10 +176,10 @@ "parser/deflexer" "parser/grammar-reader" "parser/grammar" + "parser/ll1" "parser" ;; :std/text "text/char-set" - "text/basic-parsers" "text/basic-printers" "text/utf8" "text/utf16" diff --git a/src/std/misc/decimal.ss b/src/std/misc/decimal.ss index 4ad061577..93dcd2b7e 100644 --- a/src/std/misc/decimal.ss +++ b/src/std/misc/decimal.ss @@ -1,5 +1,6 @@ (export decimal? + ll1-decimal parse-decimal string->decimal write-decimal @@ -23,8 +24,8 @@ integer-log factor-out-powers factor-out-powers-of-2) (only-in :std/misc/ports with-output) (only-in :std/parser/base raise-parse-error) + (only-in :std/parser/ll1 ll1-skip-char* ll1-eof) (only-in :std/sugar syntax-eval) - (only-in :std/text/basic-parsers parse-and-skip-any-whitespace parse-eof) (only-in :std/text/basic-printers write-n-chars) (only-in :std/text/char-set digit-char char-ascii-digit char-strict-whitespace?) (only-in :std/values first-value)) @@ -36,6 +37,7 @@ (power-of-5 (first-value (factor-out-powers-of-2 (denominator x)))) #t))) +;; If n is a power of 5, return which power that is, otherwise return #f ;; : Integer -> (OrFalse Nat) (def (power-of-5 n) (and (exact-integer? n) (positive? n) @@ -48,7 +50,8 @@ (let (l (power-of-5 q)) (and l (+ l (* 440 k))))))))) -;; `parse-decimal` expects and parses a decimal number on the PeekableStringReader. +;; LL(1) parser for a decimal number, in a style compatible with std/parser/ll1. +;; `ll1-decimal` parses a decimal number from a PeekableStringReader. ;; The character parameters `decimal-mark` and `group-separator` provide ;; support for different (typically cultural) numerical conventions. ;; For convenience, a `group-separator` of #t will be treated as the comma character. @@ -60,15 +63,14 @@ ;; when `exponent-allowed` is a string. ;; Side-effects the PeekableStringReader, and returns the decimal number, or raises an exception. ;; It is up to the caller to ignore and leading or trailing whitespace and check for eof -;; before and/or after calling `parse-decimal`. +;; before and/or after calling `ll1-decimal`. ;; : PeekableStringReader sign-allowed?:Bool decimal-mark:Char group-separator:(Or Char Bool) exponent-allowed:(or Bool String) -> Decimal -(def (parse-decimal - pre-reader +(def (ll1-decimal + reader sign-allowed?: (sign-allowed? #t) decimal-mark: (decimal-mark #\.) group-separator: (group-separator_ #f) exponent-allowed: (exponent-allowed_ #f)) - (def reader (PeekableStringReader (open-buffered-string-reader pre-reader))) (check-argument (boolean? sign-allowed?) "boolean" sign-allowed?) (check-argument (or (char? decimal-mark) (boolean? decimal-mark)) "char or boolean" decimal-mark) (check-argument (or (boolean? group-separator_) (char? group-separator_)) @@ -94,25 +96,25 @@ (def valid? #f) ;; have we seen at least one digit (def (peek) (reader.peek-char)) (def c (peek)) - (def (bad) (raise-parse-error parse-decimal "Unexpected character" #f pre-reader)) + (def (bad) (raise-parse-error ll1-decimal "Unexpected character" #f reader)) (def (next) (reader.read-char) (set! c (peek))) - (def (parse-sign) (case c ((#\+) (next) 1) ((#\-) (next) -1) (else 1))) + (def (ll1-sign) (case c ((#\+) (next) 1) ((#\-) (next) -1) (else 1))) (let/cc return - (when (eof-object? c) (raise-parse-error parse-decimal "Unexpected EOF" #!eof pre-reader)) - (when sign-allowed? (set! sign (parse-sign))) + (when (eof-object? c) (raise-parse-error ll1-decimal "Unexpected EOF" #!eof reader)) + (when sign-allowed? (set! sign (ll1-sign))) (def (done) (return (* sign (/ numerator denominator) (expt 10 (* exponent-sign exponent))))) - (def (parse-left-digit-or-group-separator) + (def (ll1-left-digit-or-group-separator) (cond ((char-ascii-digit c) => (lambda (d) (set! numerator (+ (* numerator 10) d)) (set! valid? #t) (next) - (parse-left-digit-or-group-separator))) + (ll1-left-digit-or-group-separator))) ((and group-separator (eqv? group-separator c)) (next) - (parse-left-digit-or-group-separator)) + (ll1-left-digit-or-group-separator)) (else (maybe-decimal-mark)))) (def (maybe-decimal-mark) (cond @@ -120,12 +122,12 @@ (next) (if valid? ;; if we have seen a left digit (maybe-right-digit) - (parse-right-digit))) + (ll1-right-digit))) (valid? (maybe-exponent-marker)) (else (bad)))) - (def (parse-right-digit) + (def (ll1-right-digit) (cond ((char-ascii-digit c) => (lambda (d) @@ -153,9 +155,9 @@ (maybe-exponent-sign)) (else (done)))) (def (maybe-exponent-sign) - (set! exponent-sign (parse-sign)) - (parse-exponent-digit)) - (def (parse-exponent-digit) + (set! exponent-sign (ll1-sign)) + (ll1-exponent-digit)) + (def (ll1-exponent-digit) (cond ((char-ascii-digit c) => (lambda (d) @@ -174,7 +176,18 @@ (maybe-exponent-digit))) (else (done)))) - (parse-left-digit-or-group-separator)))) + (ll1-left-digit-or-group-separator)))) + +;; Cast some input to a buffered-string-reader and parse it as a decimal +;; The input can be a string, input port, BufferedStringReader, StringReader, or Reader. +(def (parse-decimal input + sign-allowed?: (sign-allowed? #t) + decimal-mark: (decimal-mark #\.) + group-separator: (group-separator #f) + exponent-allowed: (exponent-allowed #f)) + (ll1-decimal (PeekableStringReader (open-buffered-string-reader input)) + sign-allowed?: sign-allowed? decimal-mark: decimal-mark + group-separator: group-separator exponent-allowed: exponent-allowed)) ;; String sign-allowed?:Bool decimal-mark:Char group-separator:(Or Char Bool) exponent-allowed:(or Bool String) allow-leading-whitespace?:Bool allow-trailing-whitespace?:Bool start:Nat end:(OrFalse Nat) -> Decimal (def (string->decimal s @@ -199,16 +212,16 @@ (lambda (port) (def reader (PeekableStringReader (open-buffered-string-reader port))) (when allow-leading-whitespace? - (parse-and-skip-any-whitespace reader (make-space? allow-leading-whitespace?))) + ((ll1-skip-char* (make-space? allow-leading-whitespace?)) reader)) (begin0 - (parse-decimal reader - sign-allowed?: sign-allowed? - decimal-mark: decimal-mark - group-separator: group-separator - exponent-allowed: exponent-allowed) + (ll1-decimal reader + sign-allowed?: sign-allowed? + decimal-mark: decimal-mark + group-separator: group-separator + exponent-allowed: exponent-allowed) (when allow-trailing-whitespace? - (parse-and-skip-any-whitespace reader (make-space? allow-trailing-whitespace?))) - (parse-eof reader))))) + ((ll1-skip-char* (make-space? allow-trailing-whitespace?)) reader)) + (ll1-eof reader))))) ;; Given a positive integer d of the form 2^m*5^n (reduced denominator of a decimal number), ;; compute c such that c*d = c*(2^m*5^n) = 10^max(m,n). diff --git a/src/std/parser/ll1-test.ss b/src/std/parser/ll1-test.ss new file mode 100644 index 000000000..8fc676b6a --- /dev/null +++ b/src/std/parser/ll1-test.ss @@ -0,0 +1,98 @@ +(import + :std/error + (only-in :std/parser/base parse-error?) + :std/sugar + :std/test + :std/text/char-set + "./ll1") + +(export ll1-test) + +(defrule (check-parse parser string result) + (begin + (check-equal? (ll1/string parser string) result) + (check-equal? (call-with-input-string string (cut ll1/port parser <>)) result))) +(defrule (check-parse-error parser string) + (begin + (check-exception (ll1/string parser string) parse-error?) + (check-exception (call-with-input-string string (cut ll1/port parser <>)) parse-error?))) + +(def ll1-test + (test-suite "test suite for std/parser/ll1" + (test-case "empty" + (check-parse ll1-empty "" (void)) + (check-parse (ll1-result 42) "" 42) + (check-parse (ll1-pure 42) "" 42) + (check-parse-error ll1-empty "foo") + (check-parse-error (ll1-result 42) "foo") + (check-parse-error (ll1-pure 42) "foo")) + (test-case "ll1-uint" + (check-parse ll1-uint "1" 1) + (check-parse ll1-uint "1984" 1984) + (check-parse ll1-uint "010" 10) ;; ain't no octal + (check-parse (cut ll1-uint <> 8) "10" 8) ;; octal this time. + (check-parse-error ll1-uint " 1") ;; no space allowed in front unless you ask + (check-parse-error ll1-uint "1 no junk allowed")) + (test-case "char" + (check-parse (ll1-char "fo") "f" #\f) + (check-parse (ll1-char "fo") "o" #\o) + (check-parse (ll1-char char-ascii-alphabetic?) "a" #\a)) + (test-case "parser combinators" + ;; Parser from AdventOfCode 2023 Day 2 https://adventofcode.com/2023/day/2 + (def (ll1-color color) + (ll1-begin (ll1-string (as-string color)) (ll1-pure color))) + (def ll1-color-drawing + (ll1-bind ll1-uint + (lambda (n) + (ll1* cons + (ll1-begin (ll1-string " ") + (ll1-peek "rgb") + (ll1-or (ll1-color 'red) + (ll1-color 'green) + (ll1-color 'blue))) + (ll1-result n))))) + (def ll1-end-of-drawing + (ll1-peek [#!eof ";\n\r"])) + (def ll1-drawing + (ll1-separated ll1-color-drawing (ll1-string ", ") ll1-end-of-drawing)) + (def ll1-game + (ll1* cons + (ll1-begin (ll1-string "Game ") ll1-uint) + (ll1-begin (ll1-string ": ") + (ll1-separated ll1-drawing (ll1-string "; ") ll1-eolf?)))) + (def ll1-games (ll1-repeated (ll1-begin0 ll1-game ll1-eolf) ll1-eof)) + (check-parse + ll1-games + "Game 1: 3 blue, 4 red; 1 red, 2 green, 6 blue; 2 green +Game 2: 1 blue, 2 green; 3 green, 4 blue, 1 red; 1 green, 1 blue +Game 3: 8 green, 6 blue, 20 red; 5 blue, 4 red, 13 green; 5 green, 1 red +Game 4: 1 green, 3 red, 6 blue; 3 green, 6 red; 3 green, 15 blue, 14 red +Game 5: 6 red, 1 blue, 3 green; 2 blue, 1 red, 2 green" + '((1 ((blue . 3) (red . 4)) ((red . 1) (green . 2) (blue . 6)) ((green . 2))) + (2 ((blue . 1) (green . 2)) ((green . 3) (blue . 4) (red . 1)) ((green . 1) (blue . 1))) + (3 ((green . 8) (blue . 6) (red . 20)) ((blue . 5) (red . 4) (green . 13)) ((green . 5) (red . 1))) + (4 ((green . 1) (red . 3) (blue . 6)) ((green . 3) (red . 6)) ((green . 3) (blue . 15) (red . 14))) + (5 ((red . 6) (blue . 1) (green . 3)) ((blue . 2) (red . 1) (green . 2))))) + (check-parse ll1-games "Game 1: 1 red\n" '((1 ((red . 1))))) + (check-parse ll1-games "Game 42: " '((42)))) + (test-case "parser combinators #2" + (check-parse (ll1-list ll1-sint (ll1-string "foo")) "-5foo" [-5 "foo"]) + (check-parse (ll1-n-chars 6 "ban") "banana" "banana") + (check-parse (ll1-n-times 3 (ll1-n-chars 2 char-ascii-alphabetic?)) "banana" '("ba" "na" "na")) + (check-parse (ll1-char (hash (#\a 1) (#\b 2))) "b" #\b) + (check-parse (ll1-list (ll1-char #\b) (ll1-char? #\x) (ll1-char? #\a) + (ll1-char+ #\n) (ll1-char* "na") + ll1-skip-space* (ll1-skip-char* char-ascii-alphabetic?) + ll1-eol (ll1-n-digits 2) ll1-eol) + "banana phone\n42\r\n" '(#\b #f #\a "n" "ana" #!void #!void "\n" 42 "\r\n")) + (check-parse ll1-line "foo" "foo") + (check-parse (ll1-list ll1-line ll1-eolf) "foo\r" '("foo" "\r")) + (check-parse ll1-lines "a\nb\r\nc\r\rd" '("a" "b" "c" "" "d")) + (check-parse (cut ll1-lines <> ll1-uint) "1\n2\r\n3\r4\r5\n" '(1 2 3 4 5)) + (check-parse (ll1-list ll1-sint ll1-skip-space-to-eof) "2023 " '(2023 #!void)) + (check-parse (ll1-repeated (ll1-case (#\r (ll1-string "red")) + ("\r\n" ll1-eol) + (char-strict-whitespace? ll1-skip-space*) ; NB: contains \r\n + (char-ascii-digit ll1-uint)) ll1-eof) + "\nred3 45\r\nred" '("\n" "red" 3 #!void 45 "\r\n" "red")) + (check-parse-error (ll1-case (#\r (ll1-string "ro"))) "o")))) diff --git a/src/std/parser/ll1.ss b/src/std/parser/ll1.ss new file mode 100644 index 000000000..c845abb25 --- /dev/null +++ b/src/std/parser/ll1.ss @@ -0,0 +1,253 @@ +;; -*- Gerbil -*- +;;;; Basic LL(1) parser combinators +;; With this module, you can use parser combinators to create LL(1) parsers +;; working over a PeekableStringReader input. Build your parser then use one of +;; ll1/reader ll1/string ll1/port ll1/file or ll1/file-lines to consume input. +;; +;; Beware that these parsers use only one character of look-ahead, and that +;; if an alternative in a ll1-or fails further than at the start, +;; the next alternative will start from where the previous one failed, +;; and not from the point at which the ll1-or started. +;; This may be a reason to prefer ll1-case over ll1-or. +;; Use a more advanced kind of parser this is not enough. See std/parser. + +(export #t) + +(import + (for-syntax :std/misc/number) + :std/error + :std/contract + :std/io + (only-in :std/parser/base parse-error? raise-parse-error) + :std/iter + :std/misc/bytes + :std/misc/list-builder + :std/srfi/1 + :std/srfi/13 + :std/sugar + :std/text/char-set) + +;;; empty string parser +(def (ll1-empty reader) + (void)) + +;;; ll1 parser combinators + +(defrule (ll1-result result) (lambda (reader) result)) +(def (ll1-pure value) (ll1-result value)) +(def (ll1-bind processed processor) + (lambda (reader) ((processor (processed reader)) reader))) +(defrule (ll1-begin ignored ... last) + (lambda (reader) (ignored reader) ... (last reader))) +(defrule (ll1-begin0 first ignored ...) + (lambda (reader) (begin0 (first reader) (ignored reader) ...))) +(def (ll1-or . alternatives) + (ll1-or/list alternatives)) +(def ((ll1-or/list alternatives) reader) + (let loop ((as alternatives)) + (match as + ([] (raise-parse-error ll1-or "out of alternatives" alternatives reader)) + ([a . r] (with-catch (lambda (e) (if (parse-error? e) (loop r) (raise e))) + (cut a reader)))))) +(def (ll1-repeated element terminator (rhead '())) + (let loop ((r rhead)) + (ll1-or + (ll1-begin terminator (ll1-result (reverse r))) + (ll1-bind element (lambda (e) (loop [e . r])))))) +(def (ll1-separated element separator terminator (rhead '())) + (ll1-or + (ll1-begin terminator (ll1-pure (reverse rhead))) + (ll1-bind element (lambda (e) (ll1-repeated (ll1-begin separator element) terminator [e . rhead]))))) +(def ((ll1* f . elements) reader) + (apply f (map-in-order (cut <> reader) elements))) +(def (ll1-list . elements) + (apply ll1* list elements)) +(def ((ll1-n-times n element) reader) + (for/collect ((_ (in-range n))) (element reader))) + +;;; Peeker procedure from user-friendly spec +(def (peeker spec) + (cond + ((procedure? spec) spec) + ((char? spec) (lambda (y) (and (eqv? spec y) y))) + ((string? spec) (lambda (y) (and (char? y) (string-index spec y) y))) + ((hash-table? spec) (cut hash-get spec <>)) + ((eof-object? spec) eof-object?) + ((list? spec) (lambda (y) (ormap (lambda (s) ((peeker s) y)) spec))))) + +(defsyntax (ll1-case stx) + (syntax-case stx (else) + ((_ (spec body ...) ... (else e ...)) ;; NB: body is allowed to start with => ! + (with-syntax (((p ...) + (map (let (c (make-counter)) (lambda (_) (make-symbol "var" (c)))) + (syntax->list #'(spec ...))))) + #'(let ((p (peeker spec)) ...) + (lambda (reader) + (let (c (PeekableStringReader-peek-char reader)) + ((cond + ((p c) body ...) ... + (else e ...)) reader)))))) + ((ll1-case (spec body ...) ...) + #'(ll1-case (spec body ...) ... + (else (lambda (reader) + (raise-parse-error 'll1-case "unexpected input" + (PeekableStringReader-peek-char reader) reader))))))) + +(def (ll1-peek spec) + (ll1-case (spec PeekableStringReader-peek-char))) + +(def (ll1-char spec) + (ll1-case (spec PeekableStringReader-read-char))) + +(def (ll1-string string) + (lambda (reader) + (string-for-each (lambda (c) ((ll1-char c) reader)) string) + string)) + +(def (ll1-char? spec) + (let (p (peeker spec)) + (lambda (reader) + (using (reader : PeekableStringReader) + (and (p (reader.peek-char)) (reader.read-char)))))) + +(def (ll1-char* spec) + (let (p (peeker spec)) + (lambda (reader) + (using (reader : PeekableStringReader) + (and (p (reader.peek-char)) + (call-with-output-string + (lambda (o) (while (begin (write-char (reader.read-char) o) + (p (reader.peek-char))))))))))) + +(def (ll1-char+ spec) + (let (p (peeker spec)) + (lambda (reader) + (using (reader : PeekableStringReader) + (if (p (reader.peek-char)) + (call-with-output-string + (lambda (o) (while (begin (write-char (reader.read-char) o) + (p (reader.peek-char)))))) + (raise-parse-error ll1-char+ "Unexpected character" + (reader.peek-char) reader)))))) + +(def (ll1-skip-char* spec) + (let (p (peeker spec)) + (lambda (reader) + (using (reader : PeekableStringReader) + (while (p (reader.peek-char)) + (reader.read-char)))))) + +(def ll1-skip-space* (ll1-skip-char* char-strict-whitespace?)) + +(def (eolf? c) + (or (eqv? c #\newline) (eof-object? c) (eqv? c #\return))) + +(def (peekable-eof? reader) + (eof-object? (PeekableStringReader-peek-char reader))) + +(def ll1-eof (ll1-peek eof-object?)) + +(def ll1-eolf? (ll1-peek eolf?)) + +(def ll1-eol + (let ((rp PeekableStringReader-peek-char) (rc PeekableStringReader-read-char)) + (ll1-case (#\newline (lambda (r) (rc r) "\n")) + (#\return (lambda (r) (rc r) (if (eqv? (rp r) #\newline) (begin (rc r) "\r\n") "\r")))))) + +(def (ll1-eolf reader) + (if (peekable-eof? reader) #!eof + (ll1-eol reader))) + +;;; Parse a natural number in decimal on the current reader, return it. +(def (ll1-uint reader (base 10)) + (using (reader : PeekableStringReader) + (if-let (digit (char-ascii-digit (reader.peek-char) base)) + (let loop ((n digit)) + (reader.read-char) + (if-let (next-digit (char-ascii-digit (reader.peek-char) base)) + (loop (+ next-digit (* base n))) + n)) + (raise-parse-error ll1-uint "Not a digit in requested base" + (reader.peek-char) base reader)))) + +(def (ll1-sint reader (base 10)) + (using (reader : PeekableStringReader) + (let ((char (reader.peek-char))) + (cond + ((eqv? char #\+) + (reader.read-char) + (ll1-uint reader base)) + ((eqv? char #\-) + (reader.read-char) + (- (ll1-uint reader base))) + ((char-ascii-digit char base) + (ll1-uint reader base)) + (else + (raise-parse-error ll1-sint "Neither a sign nor a digit in requested base" + char base reader)))))) + +(def (ll1-n-chars n spec) + (let (p (peeker spec)) + (lambda (reader) + (using (reader : PeekableStringReader) + (def s (make-string n)) + (for (i (in-range n)) + (def c (reader.peek-char)) + (unless (p c) + (raise-parse-error parse-n-chars "invalid character" c n spec i)) + (string-set! s i (reader.read-char))) + s)))) + +(def (ll1-n-digits n (base 10)) + (lambda (reader) + (using (reader : PeekableStringReader) + (let loop ((i n) (r 0)) + (if (zero? i) r + (let* ((char (reader.peek-char)) + (digit (char-ascii-digit char base))) + (if digit + (begin (reader.read-char) (loop (1- i) (+ digit (* base r)))) + (raise-parse-error ll1-n-digits "not a digit" char reader n base i r)))))))) + +;; Parse a line, stop before any EOF or newline or return (but don't consume it) +(def (ll1-line reader) + (using (reader : PeekableStringReader) + (call-with-output-string + [] (lambda (o) + (let loop () + (let ((char (reader.peek-char))) + (cond + ((eolf? char) (void)) + (else (display char o) (reader.read-char) (loop))))))))) + +(def (ll1-lines reader (parse-line ll1-line)) + (with-list-builder (c) + (until (peekable-eof? reader) + (c (parse-line reader)) + (ll1-eolf reader)))) + +(def (ll1-to-eof parse) (ll1-begin0 parse ll1-eof)) + +(def ll1-skip-space-to-eof (ll1-to-eof ll1-skip-space*)) + +;; Parse an entire PeekableReader +(def (ll1/reader parser reader (description reader) (where 'll1/reader)) + (using (reader : PeekableStringReader) + (with-catch (lambda (e) (raise-parse-error where "failure parsing" description (error-message e))) + (cut (ll1-to-eof parser) reader)))) + +;; Parse an entire port +(def (ll1/port parser port (description port) (where 'll1/port)) + (ll1/reader parser (make-raw-textual-input-port port) description where)) + +;; Parse an entire file +(def (ll1/file parser file (description file) (where 'll1/file)) + (call-with-input-file file (lambda (port) (ll1/port parser port description where)))) + +;; Parse an entire string +(def (ll1/string parser string (description string) (where 'll1/string)) + (ll1/reader parser (open-buffered-string-reader string) description where)) + +;; Parse an entire file line-by-line +(def (ll1/file-lines parse-line file (description file) (where 'll1/file-lines)) + (ll1/file file (cut ll1-lines <> parse-line) description where)) diff --git a/src/std/text/basic-parsers-test.ss b/src/std/text/basic-parsers-test.ss deleted file mode 100644 index 7755c7160..000000000 --- a/src/std/text/basic-parsers-test.ss +++ /dev/null @@ -1,28 +0,0 @@ -(import - :std/error - (only-in :std/parser/base parse-error?) - :std/sugar - :std/test - :std/text/basic-parsers) - -(export basic-parsers-test) - -(defrule (check-parse parser string result) - (begin - (check-equal? (parse-string parser string) result) - (check-equal? (call-with-input-string string (cut parse-port parser <>)) result))) -(defrule (check-parse-error parser string) - (begin - (check-exception (parse-string parser string) parse-error?) - (check-exception (call-with-input-string string (cut parse-port parser <>)) parse-error?))) - -(def basic-parsers-test - (test-suite "test suite for std/misc/basic-parsers" - (test-case "1" - (check-parse parse-natural "1" 1) - (check-parse parse-natural "010" 10) ;; ain't no octal - (check-parse (cut parse-natural <> 8) "10" 8) ;; octal this time. - (check-parse-error parse-natural " 1") ;; no space allowed in front unless you ask - (check-parse-error parse-natural "1 no junk allowed")) - (test-case "parse-integer" - (check-equal? 1 1)))) diff --git a/src/std/text/basic-parsers.ss b/src/std/text/basic-parsers.ss deleted file mode 100644 index aa77b29da..000000000 --- a/src/std/text/basic-parsers.ss +++ /dev/null @@ -1,217 +0,0 @@ -;; -*- Gerbil -*- -;;;; Basic LL(1) parsers - -;; These basic LL(1) parsers work with an object satisfying the PeekableStringReader interface. -;; Be sure to wrap your port in a (raw-port port) and cast your wrapped port or BufferedStringReader -;; to a PeekableStringReader to avoid performance penalty in calling these methods. - -;; TODO: parsing combinators that produce generating functions for all the values of a parse -;; from a generator (or stream?) of values? -;; OR, combinators that use interface-passing to handle the specific - -(export #t) - -(import - :std/error - :std/contract - :std/io - (only-in :std/parser/base parse-error? raise-parse-error) - :std/iter - :std/misc/bytes - :std/misc/list-builder - :std/srfi/1 - :std/srfi/13 - :std/sugar - :std/text/char-set) - - -(def (string-reader-eof? reader) - (using (reader : PeekableStringReader) - (eof-object? (reader.peek-char)))) - -;;; Parse an empty string -(def (parse-empty reader) - (void)) - -;;; Parse a natural number in decimal on the current reader, return it. -(def (parse-natural reader (base 10)) - (using (reader : PeekableStringReader) - (if-let (digit (char-ascii-digit (reader.peek-char) base)) - (let loop ((n digit)) - (reader.read-char) - (if-let (next-digit (char-ascii-digit (reader.peek-char) base)) - (loop (+ next-digit (* base n))) - n)) - (raise-parse-error parse-natural "Not a digit in requested base" - (reader.peek-char) base reader)))) - -(def (parse-signed-integer reader (base 10)) - (using (reader : PeekableStringReader) - (let ((char (reader.peek-char))) - (cond - ((eqv? char #\+) - (reader.read-char) - (parse-natural reader base)) - ((eqv? char #\-) - (reader.read-char) - (- (parse-natural reader base))) - ((char-ascii-digit char) - (parse-natural reader base)) - (else - (raise-parse-error parse-signed-integer "Neither a sign nor a digit in requested base" - char base reader)))))) - -(def (parse-maybe-one-of char-pred?) - (lambda (reader) - (using (reader : PeekableStringReader) - (def c (reader.peek-char)) - (and (char-pred? c) c)))) - -(def (parse-one-of char-pred?) - (lambda (reader) - (using (reader : PeekableStringReader) - (def c (reader.peek-char)) - (if (char-pred? c) - c - (raise-parse-error parse-one-of "Unexpected character" - c char-pred? reader))))) - -(def (parse-any-number-of char-pred?) - (lambda (reader) - (using (reader : PeekableStringReader) - (and (char-pred? (reader.peek-char)) - (call-with-output-string - (lambda (out) (while (begin (write-char (reader.read-char) out) - (char-pred? (reader.peek-char)))))))))) - -(def (parse-one-or-more-of char-pred?) - (lambda (reader) - (using (reader : PeekableStringReader) - (or ((parse-any-number-of char-pred?) reader) - (raise-parse-error parse-one-or-more-of "Unexpected character" - (reader.peek-char) reader))))) - -(def (parse-maybe-char char) - (parse-maybe-one-of (cut eqv? char <>))) - -(def (parse-one-char char) - (parse-one-of (cut eqv? char <>))) - -(def (parse-and-skip-any-whitespace reader (whitespace? char-strict-whitespace?)) - (using (reader : PeekableStringReader) - (while (whitespace? (reader.peek-char)) - (reader.read-char)))) - -(def parse-eof (parse-one-of eof-object?)) - -(def (parse-eol reader) - (def char ((parse-one-of char-eol?) reader)) - (when (eqv? char #\return) - ((parse-maybe-char #\newline) reader))) - -(def (parse-literal-string string) - (lambda (reader) - (string-for-each (lambda (c) ((parse-one-char c) reader)) string))) - -(def (parse-n-chars n (char-pred? char?)) - (lambda (reader) - (using (reader : PeekableStringReader) - (def s (make-string n)) - (for (i (in-range n)) - (def c (reader.peek-char)) - (unless (char-pred? c) - (raise-parse-error parse-n-chars "invalid character" c n char-pred? i)) - (string-set! s i c)) - s))) - -(def (parse-n-digits n (base 10)) - (lambda (reader) - (using (reader : PeekableStringReader) - (let loop ((n n) (r 0)) - (if (zero? n) r - (let* ((char (reader.peek-char)) - (digit (char-ascii-digit char base))) - (if digit - (begin (reader.read-char) (loop (- n 1) (+ digit (* base r)))) - (raise-parse-error parse-n-digits "not a digit" char reader n base)))))))) - -;; Like parse-line, but handles (and still strips) any of the CRLF, CR and LF line endings -(def (parse-line reader) - (using (reader : PeekableStringReader) - (call-with-output-string - [] (lambda (out) - (let loop () - (let ((char (reader.peek-char))) - (cond - ((char-eol? char) (parse-eol reader)) - ((eof-object? char) (void)) - (else (display char out) (reader.read-char) (loop))))))))) - -(def (parse-lines reader (parse-line parse-line)) - (with-list-builder (c) - (until (string-reader-eof? reader) - (c (parse-line reader)) - (parse-eol reader)))) - -(def (parse-to-eof parse) - (lambda (reader) (begin0 (parse reader) (parse-eof reader)))) - -(def (parse-whitespace-to-eof (whitespace? char-strict-whitespace?)) - (parse-to-eof (cut parse-and-skip-any-whitespace <> whitespace?))) - -;; Parse an entire PeekableReader -(def (parse-reader parser reader (description reader) (where 'parse-reader)) - (with-catch (lambda (e) (raise-parse-error where "failure parsing" description (error-message e))) - (lambda () ((parse-to-eof parser) reader)))) - -;; Parse an entire port -(def (parse-port parser port (description port) (where 'parse-port)) - (parse-reader parser (PeekableStringReader (make-raw-textual-input-port port)) description where)) - -;; Parse an entire file -(def (parse-file parser file (description file) (where 'parse-file)) - (call-with-input-file file (lambda (port) (parse-port parser port description where)))) - -;; Parse an entire string -(def (parse-string parser string (description string) (where 'parse-string)) - (parse-reader parser (PeekableStringReader (open-buffered-string-reader string)) description where)) - -;; Parse an entire file line-by-line -(def (parse-file-lines parse-line file (description file) (where 'parse-file-lines)) - (parse-file file (cut parse-lines <> parse-line) description where)) - - -;; Monadic parsing combinators -(def ((parse-alternatives alternatives (where 'parse-alternatives)) reader) - (let loop ((as alternatives)) - (if (null? as) - (raise-parse-error where "none applied" alternatives reader) - (with-catch (lambda (e) (if (parse-error? e) (loop (cdr as)) (raise e))) - (lambda () ((car as) reader)))))) -(defrule (parse-result result) (lambda (_port) result)) -(def (parse-pure value) (parse-result value)) -(def (parse-bind processed processor) - (lambda (reader) ((processor (processed reader)) reader))) -(def (parse-or . alternatives) - (parse-alternatives alternatives 'parse-or)) -(defrule (parse-begin parse-ignored ... parse-value) - (lambda (reader) (parse-ignored reader) ... (parse-value reader))) -(defrule (parse-begin0 parse-value parse-ignored ...) - (lambda (reader) (begin0 (parse-value reader) (parse-ignored reader) ...))) -(def (parse-repeated parse-element parse-terminator (rhead '())) - (let loop ((r rhead)) - (parse-or - (parse-begin parse-terminator (parse-result (reverse r))) - (parse-bind parse-element (lambda (e) (loop [e . r])))))) -(def (parse-separated parse-element parse-separator parse-terminator) - (parse-or - (parse-begin parse-terminator (parse-pure '())) - (parse-bind parse-element - (lambda (e) (parse-repeated (parse-begin parse-separator parse-element) - parse-terminator [e]))))) -(def ((parse-n-repeats n parse-element) reader) - (for/collect ((_ (in-range n))) (parse-element reader))) -(def ((parse* f . parse-elements) reader) - (apply f (map-in-order (lambda (ee) (ee reader)) parse-elements))) -(def (parse-list . parse-elements) - (apply parse* list parse-elements)) diff --git a/src/std/text/json/util.ss b/src/std/text/json/util.ss index 39a04a0f8..3d17dce17 100644 --- a/src/std/text/json/util.ss +++ b/src/std/text/json/util.ss @@ -16,10 +16,10 @@ :std/misc/plist :std/misc/rtd :std/misc/walist + :std/parser/ll1 :std/sort :std/srfi/43 :std/sugar - :std/text/basic-parsers :std/values ./env ./input ./output) @@ -39,16 +39,16 @@ (def (string->json-object str) (let (reader (open-buffered-string-reader str)) (begin0 (read-json-object/reader reader (make-env)) - ((parse-whitespace-to-eof) (PeekableStringReader reader))))) + (ll1-skip-space-to-eof (PeekableStringReader reader))))) (def (bytes->json-object bytes) (let (buffer (open-buffered-reader bytes)) (begin0 (read-json-object/buffer buffer (make-env)) - ((parse-whitespace-to-eof) (PeekableStringReader (open-buffered-string-reader buffer)))))) + (ll1-skip-space-to-eof (PeekableStringReader (open-buffered-string-reader buffer)))))) (def (port->json-object port) (begin0 (read-json-object/port port (make-env)) - ((parse-whitespace-to-eof) (PeekableStringReader (open-buffered-string-reader port))))) + (ll1-skip-space-to-eof (PeekableStringReader (open-buffered-string-reader port))))) (def (write-json obj (output (current-output-port))) (cond