Skip to content

Commit

Permalink
Merge pull request #1428 from MegaIng/cleanup-test_parser
Browse files Browse the repository at this point in the history
Clean up test_parser.py; test more cases
  • Loading branch information
erezsh authored Jun 20, 2024
2 parents 1be22db + bd230b2 commit 8611d69
Showing 1 changed file with 37 additions and 115 deletions.
152 changes: 37 additions & 115 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@

from lark import Token, Transformer_NonRecursive, LexError

try:
from cStringIO import StringIO as cStringIO
except ImportError:
# Available only in Python 2.x, 3.x only has io.StringIO from below
cStringIO = None
from io import (
StringIO as uStringIO,
BytesIO,
Expand All @@ -28,6 +23,7 @@
except ImportError:
regex = None


import lark
from lark import logger
from lark.lark import Lark
Expand Down Expand Up @@ -399,6 +395,8 @@ def test_anon(self):

self.assertEqual( g.parse('abc').children[0], 'abc')


@unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
def test_earley(self):
g = Lark("""start: A "b" c
A: "a"+
Expand All @@ -421,8 +419,7 @@ def test_earley2(self):
l = Lark(grammar, parser='earley', lexer=LEXER)
l.parse(program)


@unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
@unittest.skipIf(LEXER != 'dynamic_complete', "Only relevant for the dynamic_complete parser")
def test_earley3(self):
"""Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
Expand Down Expand Up @@ -758,6 +755,8 @@ def test_ambiguous_intermediate_node_conditionally_inlined_rule(self):
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)


@unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
def test_fruitflies_ambig(self):
grammar = """
start: noun verb noun -> simple
Expand Down Expand Up @@ -913,24 +912,6 @@ def test_cycles_with_child_filter(self):
self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))





# @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
# def test_not_all_derivations(self):
# grammar = """
# start: cd+ "e"

# !cd: "c"
# | "d"
# | "cd"

# """
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
# x = l.parse('cde')
# assert x.data != '_ambig', x
# assert len(x.children) == 1

_NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley
Expand Down Expand Up @@ -1086,11 +1067,6 @@ def test_basic2(self):
assert x.data == 'start' and x.children == ['12', '2'], x


@unittest.skipIf(cStringIO is None, "cStringIO not available")
def test_stringio_bytes(self):
"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
_Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))

def test_stringio_unicode(self):
"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
_Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
Expand Down Expand Up @@ -1140,7 +1116,7 @@ def test_unicode_literal_range_escape(self):
""")
g.parse('abc')

@unittest.skipIf(sys.version_info < (3, 3), "re package did not support 32bit unicode escape sequence before Python 3.3")

def test_unicode_literal_range_escape2(self):
g = _Lark(r"""start: A+
A: "\U0000FFFF".."\U00010002"
Expand All @@ -1153,8 +1129,7 @@ def test_hex_literal_range_escape(self):
""")
g.parse('\x01\x02\x03')

@unittest.skipIf(sys.version_info[0]==2 or sys.version_info[:2]==(3, 4),
"bytes parser isn't perfect in Python2, exceptions don't work correctly")

def test_bytes_utf8(self):
g = r"""
start: BOM? char+
Expand Down Expand Up @@ -1305,49 +1280,6 @@ def test_empty_flatten_list(self):
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ())

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_single_item_flatten_list(self):
g = _Lark(r"""start: list
list: | item "," list
item : A
A: "a"
""")
r = g.parse("a,")

# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

# Sanity check: verify that 'list' contains exactly the one 'item' we've given it
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ('item',))

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_multiple_item_flatten_list(self):
g = _Lark(r"""start: list
#list: | item "," list
item : A
A: "a"
""")
r = g.parse("a,a,")

# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

# Sanity check: verify that 'list' contains exactly the two 'item's we've given it
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_recurse_flatten(self):
"""Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
g = _Lark(r"""start: a | start a
a : A
A : "a" """)

# Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
# STree data structures, which uses recursion).
g.parse("a" * (sys.getrecursionlimit() // 4))

def test_token_collision(self):
g = _Lark(r"""start: "Hello" NAME
NAME: /\w/+
Expand Down Expand Up @@ -1459,20 +1391,6 @@ def test_g_regex_flags(self):
x1 = g.parse("ABBc")
x2 = g.parse("abdE")

# def test_string_priority(self):
# g = _Lark("""start: (A | /a?bb/)+
# A: "a" """)
# x = g.parse('abb')
# self.assertEqual(len(x.children), 2)

# # This parse raises an exception because the lexer will always try to consume
# # "a" first and will never match the regular expression
# # This behavior is subject to change!!
# # This won't happen with ambiguity handling.
# g = _Lark("""start: (A | /a?ab/)+
# A: "a" """)
# self.assertRaises(LexError, g.parse, 'aab')

def test_rule_collision(self):
g = _Lark("""start: "a"+ "b"
| "a"+ """)
Expand Down Expand Up @@ -1561,13 +1479,6 @@ def test_special_chars(self):
""")
x = g.parse('\n')


# def test_token_recurse(self):
# g = _Lark("""start: A
# A: B
# B: A
# """)

@unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_empty(self):
# Fails an Earley implementation without special handling for empty rules,
Expand Down Expand Up @@ -1649,13 +1560,6 @@ def test_token_flags(self):
tree = l.parse('aA')
self.assertEqual(tree.children, ['a', 'A'])

# g = """!start: "a"i "a"
# """
# self.assertRaises(GrammarError, _Lark, g)

# g = """!start: /a/i /a/
# """
# self.assertRaises(GrammarError, _Lark, g)

g = """start: NAME "," "a"
NAME: /[a-z_]/i /[a-z0-9_]/i*
Expand All @@ -1666,6 +1570,25 @@ def test_token_flags(self):
tree = l.parse('AB,a')
self.assertEqual(tree.children, ['AB'])

@unittest.skipIf(LEXER in ('basic', 'custom_old', 'custom_new'), "Requires context sensitive terminal selection")
def test_token_flags_collision(self):

g = """!start: "a"i "a"
"""
l = _Lark(g)
self.assertEqual(l.parse('aa').children, ['a', 'a'])
self.assertEqual(l.parse('Aa').children, ['A', 'a'])
self.assertRaises(UnexpectedInput, l.parse, 'aA')
self.assertRaises(UnexpectedInput, l.parse, 'AA')

g = """!start: /a/i /a/
"""
l = _Lark(g)
self.assertEqual(l.parse('aa').children, ['a', 'a'])
self.assertEqual(l.parse('Aa').children, ['A', 'a'])
self.assertRaises(UnexpectedInput, l.parse, 'aA')
self.assertRaises(UnexpectedInput, l.parse, 'AA')

def test_token_flags3(self):
l = _Lark("""!start: ABC+
ABC: "abc"i
Expand Down Expand Up @@ -1754,7 +1677,7 @@ def test_reduce_cycle(self):
self.assertEqual(len(tree.children), 2)


@unittest.skipIf(LEXER != 'basic', "basic lexer prioritization differs from dynamic lexer prioritization")
@unittest.skipIf('dynamic' in LEXER, "basic lexer prioritization differs from dynamic lexer prioritization")
def test_lexer_prioritization(self):
"Tests effect of priority on result"

Expand Down Expand Up @@ -2274,7 +2197,6 @@ def test_ranged_repeat_rules(self):



@unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
def test_priority_vs_embedded(self):
g = """
A.2: "a"
Expand Down Expand Up @@ -2407,7 +2329,7 @@ def test_meddling_unused(self):
parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
@unittest.skipIf(PARSER!='lalr' or LEXER == 'custom_old', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self):
grammar = """
start: _ANY b "C"
Expand Down Expand Up @@ -2512,7 +2434,7 @@ def test_regex_width_fallback(self):
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True)

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):

g = _Lark(r'''
Expand Down Expand Up @@ -2549,7 +2471,7 @@ def test_parser_interactive_parser(self):
res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))

@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
def test_error_with_interactive_parser(self):
def ignore_errors(e):
if isinstance(e, UnexpectedCharacters):
Expand Down Expand Up @@ -2584,18 +2506,18 @@ def ignore_errors(e):
s = "[0 1, 2,@, 3,,, 4, 5 6 ]$"
tree = g.parse(s, on_error=ignore_errors)

@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
def test_iter_parse(self):
ab_grammar = '!start: "a"* "b"*'
parser = Lark(ab_grammar, parser="lalr")
parser = _Lark(ab_grammar)
ip = parser.parse_interactive("aaabb")
i = ip.iter_parse()
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'b'

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
def test_interactive_treeless_transformer(self):
grammar = r"""
start: SYM+
Expand All @@ -2617,7 +2539,7 @@ def SYM(self, token):
res = ip.feed_eof()
self.assertEqual(res.children, [1, 2, 1])

@unittest.skipIf(PARSER!='lalr', "Tree-less mode is only supported in lalr")
@unittest.skipIf(PARSER == 'earley', "Tree-less mode is not supported in earley")
def test_default_in_treeless_mode(self):
grammar = r"""
start: expr
Expand All @@ -2643,7 +2565,7 @@ def __default__(self, data, children, meta):
b = parser.parse(s)
assert a == b

@unittest.skipIf(PARSER!='lalr', "strict mode is only supported in lalr for now")
@unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now")
def test_strict(self):
# Test regex collision
grammar = r"""
Expand Down Expand Up @@ -2687,7 +2609,7 @@ def test_strict(self):
for _LEXER, _PARSER in _TO_TEST:
_make_parser_test(_LEXER, _PARSER)

for _LEXER in ('dynamic', 'dynamic_complete'):
for _LEXER in ('basic', 'dynamic', 'dynamic_complete'):
_make_full_earley_test(_LEXER)

if __name__ == '__main__':
Expand Down

0 comments on commit 8611d69

Please sign in to comment.