-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
81 lines (51 loc) · 2.32 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pyparsing as pp
# many thanks to this example:
# https://github.com/pyparsing/pyparsing/blob/master/examples/jsonParser.py
# docs here:
# https://pyparsing-docs.readthedocs.io/en/latest/HowToUsePyparsing.html#usage-notes
class Parse:
ParseError = pp.ParseException
RETURN_PYTHON_COLLECTIONS = True # set to False to return ParseResults instead
delims = "<>[]"
LANGLE, RANGLE, LBRACK, RBRACK = map(pp.Suppress, delims)
label = pp.Word(pp.unicode.printables, exclude_chars=delims) # formerly jsonString
tree = pp.Forward().setName("tree") # formerly jsonObject
node = pp.Forward().setName("node") # formerly jsonValue
triangle = pp.Forward().setName("triangle") # formerly "jsonArray"
node << (label | tree | triangle)
memberDef = pp.Group(
label + pp.ZeroOrMore(node), aslist=RETURN_PYTHON_COLLECTIONS
).setName("treeMember")
treeMembers = pp.delimitedList(memberDef).setName(None)
tree << pp.Dict(
LBRACK + pp.Optional(treeMembers) + RBRACK, asdict=RETURN_PYTHON_COLLECTIONS
)
triangle << pp.Combine(
LANGLE + pp.original_text_for(pp.Optional(treeMembers)) + RANGLE, adjacent=False
# N.B.: anything below a triangle is treated literally, even if it would otherwise form valid tree or triangle data
)
def parse (s):
return (Parse.tree).parseString(s)
# once we actually turn this into nodes, we can just set the
# is_triangle flag to be True dynamically
# if we're trying to create nodes and find a string in place of a dict,
# instead of making a node based on just that,
# - split that along the first space into `category` (left) and `rest` (right)
# - make a triangle node s.t.
# - its text = `category` and
# - its only child is a (childless) node with the text `rest`
if __name__ == "__main__":
testdata = """
[Here is]
"""
results = Parse.parse(testdata)
sample = "[NP [D the] [N' <AdjP very big> [N dog]]]"
print(
Parse.parse("[NP [DP [D əthæae] [D 3ɳ0]] [N' [AdjP vɚ e̩ry big] [N dogs]]]"))
print(
Parse.parse("[NP [DP [D the] [D 30]] [N' [AdjP <AdvP very əvery very> [A big]] [N dogs]]]"))
print(
Parse.parse("[]"))
print(
Parse.parse("[IP [NP [DP [D the] [D 30]] [N' [AdjP very big] [N dogs]]] [I' [I will] [VP [V be] [P here]]]]")
)