-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrandsent
executable file
·69 lines (55 loc) · 1.84 KB
/
randsent
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python2
from __future__ import print_function
import argparse
from collections import defaultdict, deque
import random
import re
import sys
# Parse arguments. You should not need to modify this.
parser = argparse.ArgumentParser()
parser.add_argument("grammar", help="path to grammar file")
parser.add_argument("count", type=int, help="number of sentences to generate", nargs='?', default=1)
parser.add_argument("-t", "--tree", action='store_true', help="pretty-print tree form instead of printing sentence")
parser.add_argument("--seed", type=int, default=0, help="RNG seed")
parser.add_argument("--min-size", type=int)
parser.add_argument("--max-size", type=int)
args = parser.parse_args()
# Create a random generator.
rng = random.Random(args.seed)
def parse_grammar(file_):
rules = defaultdict(lambda: set([]))
pattern = re.compile('^(?P<lhs>[^\s()#]+)\s+(?P<rhs>[^()#\n]*[^()#\n\s])')
for line in file_:
match = pattern.match(line)
if match is None:
continue
groups = match.groupdict()
lhs = groups['lhs']
rhs = tuple(groups['rhs'].split())
rules[lhs].add(rhs)
assert 'START' in rules
rules = { lhs: list(rhs) for (lhs, rhs) in rules.items() }
return rules
def generate_sentence(grammar):
queue = deque(['START'])
while len(queue) > 0:
sym = queue.popleft()
# Is nonterminal?
if sym in grammar:
rhs = rng.choice(grammar[sym])
queue.extendleft(reversed(rhs))
else:
yield sym
# Parse the grammar
with open(args.grammar, 'r') as grammar_file:
grammar = parse_grammar(grammar_file)
for _ in xrange(args.count):
while True:
sent = list(generate_sentence(grammar))
if (args.min_size is None or len(sent) >= args.min_size) and \
(args.max_size is None or len(sent) <= args.max_size):
break
print(' '.join(sent))
# Local Variables:
# mode: python
# End: