Skip to content

Commit

Permalink
Merge pull request #1432 from chanicpanic/issue1431
Browse files Browse the repository at this point in the history
Fix SymbolNode.end for completed tokens
  • Loading branch information
erezsh authored Jun 26, 2024
2 parents c1dbe0c + 2faac23 commit 33136b3
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
15 changes: 3 additions & 12 deletions lark/parsers/earley_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ class SymbolNode(ForestNode):
Parameters:
s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
start: The index of the start of the substring matched by this symbol (inclusive).
end: The index of the end of the substring matched by this symbol (exclusive).
start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
Properties:
is_intermediate: True if this node is an intermediate node.
priority: The priority of the node's symbol.
"""
Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
def __init__(self, s, start, end):
self.s = s
self.start = start
Expand All @@ -59,7 +59,6 @@ def __init__(self, s, start, end):
# unlike None or float('NaN'), and sorts appropriately.
self.priority = float('-inf')
self.is_intermediate = isinstance(s, tuple)
self._hash = hash((self.s, self.start, self.end))

def add_family(self, lr0, rule, start, left, right):
self._children.add(PackedNode(self, lr0, rule, start, left, right))
Expand Down Expand Up @@ -93,14 +92,6 @@ def children(self):
def __iter__(self):
return iter(self._children)

def __eq__(self, other):
if not isinstance(other, SymbolNode):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)

def __hash__(self):
return self._hash

def __repr__(self):
if self.is_intermediate:
rule = self.s[0]
Expand Down
2 changes: 1 addition & 1 deletion lark/parsers/xearley.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def scan(i, to_scan):
token.end_pos = i + 1

new_item = item.advance()
label = (new_item.s, new_item.start, i)
label = (new_item.s, new_item.start, i + 1)
token_node = TokenNode(token, terminals[token.type])
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
Expand Down
21 changes: 21 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,27 @@ def test_cycles_with_child_filter(self):
tree = l.parse('');
self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))

@unittest.skipIf(LEXER=='basic', "start/end values work differently for the basic lexer")
def test_symbol_node_start_end_dynamic_lexer(self):
grammar = """
start: "ABC"
"""

l = Lark(grammar, ambiguity='forest', lexer=LEXER)
node = l.parse('ABC')
self.assertEqual(node.start, 0)
self.assertEqual(node.end, 3)

grammar2 = """
start: abc
abc: "ABC"
"""

l = Lark(grammar2, ambiguity='forest', lexer=LEXER)
node = l.parse('ABC')
self.assertEqual(node.start, 0)
self.assertEqual(node.end, 3)


_NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME
Expand Down

0 comments on commit 33136b3

Please sign in to comment.