diff --git a/.gitignore b/.gitignore index 26275e4b..148eeecb 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ tags /build docs/_build docs/examples +docs/sg_execution_times.rst diff --git a/docs/conf.py b/docs/conf.py index aacf8bba..59c2ac6d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,7 +76,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/how_to_develop.md b/docs/how_to_develop.md index cf95fed0..a174c2e3 100644 --- a/docs/how_to_develop.md +++ b/docs/how_to_develop.md @@ -65,3 +65,15 @@ Another way to run the tests is using setup.py: ```bash python setup.py test ``` + +## Building the Documentation + +To build the documentation: + +```sh +cd docs/ +pip install -r requirements.txt +make html +``` + +To review the result, open the built HTML files under `_build/html/` in your browser. diff --git a/docs/recipes.md b/docs/recipes.md index 8101e37c..b1e7a8b6 100644 --- a/docs/recipes.md +++ b/docs/recipes.md @@ -79,6 +79,7 @@ Prints out: *Note: We don't have to return a token, because comments are ignored* + ## CollapseAmbiguities Parsing ambiguous texts with earley and `ambiguity='explicit'` produces a single tree with `_ambig` nodes to mark where the ambiguity occurred. @@ -193,3 +194,13 @@ def parse_with_progress(parser: Lark, text: str, start=None): ``` Keep in mind that this implementation relies on the `InteractiveParser` and, therefore, only works with the `LALR(1)` parser, and not `Earley`. + + +## Parsing a Language with Significant Indentation + +If your grammar needs to support significant indentation (e.g. Python, YAML), you will need to use +the `Indenter` class. Take a look at the [indented tree example][indent] as well as the +[Python grammar][python] for inspiration. + +[indent]: examples/indented_tree.html +[python]: https://github.com/lark-parser/lark/blob/master/lark/grammars/python.lark diff --git a/examples/indented_tree.py b/examples/indented_tree.py index 5ac928c7..a43733e4 100644 --- a/examples/indented_tree.py +++ b/examples/indented_tree.py @@ -3,28 +3,34 @@ =================== A demonstration of parsing indentation (“whitespace significant” language) -and the usage of the Indenter class. +and the usage of the ``Indenter`` class. Since indentation is context-sensitive, a postlex stage is introduced to -manufacture INDENT/DEDENT tokens. +manufacture ``INDENT``/``DEDENT`` tokens. -It is crucial for the indenter that the NL_type matches -the spaces (and tabs) after the newline. +It is crucial for the indenter that the ``NL_type`` matches the spaces (and +tabs) after the newline. + +If your whitespace-significant grammar supports comments, then ``NL_type`` +must match those comments too. Otherwise, comments that appear in the middle +of a line will `confuse Lark`_. + +.. _`confuse Lark`: https://github.com/lark-parser/lark/issues/863 """ from lark import Lark from lark.indenter import Indenter tree_grammar = r""" - ?start: _NL* tree - - tree: NAME _NL [_INDENT tree+ _DEDENT] - %import common.CNAME -> NAME %import common.WS_INLINE - %declare _INDENT _DEDENT + %import common.SH_COMMENT %ignore WS_INLINE + %ignore SH_COMMENT + %declare _INDENT _DEDENT - _NL: /(\r?\n[\t ]*)+/ + ?start: _NL* tree + tree: NAME _NL [_INDENT tree+ _DEDENT] + _NL: (/\r?\n[\t ]*/ | SH_COMMENT)+ """ class TreeIndenter(Indenter): @@ -39,6 +45,7 @@ class TreeIndenter(Indenter): test_tree = """ a + # check this comment out b c d