-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpoem_gen.py
executable file
·130 lines (109 loc) · 3.4 KB
/
poem_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
import config
import os
import random
BEGIN = 0
END = 1
class markov:
def __init__(self):
# word -> next word -> number of occurences
self.associations = {}
def _add_token(self, token, next):
# Add a single word
try:
nexts = self.associations[token]
try:
nexts[next] += 1
except KeyError:
nexts[next] = 1
except KeyError:
nexts = {next : 1}
self.associations[token] = nexts
def add(self, example, preserve_newlines=False):
""" Add example to dict of associations """
if preserve_newlines:
lines = example.splitlines(True)
tokens = []
for line in lines:
tokens.extend(line.split(" "))
else:
tokens = example.split()
if not len(tokens):
# Empty example
return
for i in range(len(tokens)-1):
# Add tokens one by one
self._add_token(tokens[i], tokens[i+1])
# Add begin marker for first token
self._add_token(BEGIN, tokens[0])
# Add end terminator for final token
self._add_token(tokens[-1], END)
def get_next(self, word):
nexts = self.associations[word]
count = 0
for _, val in nexts.items():
# Count total next words
count += val
# Randomly pick a number
index = random.randint(1, count)
# Count down to element
for word, val in nexts.items():
if index <= val:
return word
else:
index -= val
assert False
def parse_poem(fname, examples):
""" Parse a poem. Takes a file name and list of examples to populate """
with open(fname) as f:
last = False
start = False
for line in f:
assert(len(line))
if start:
if line != "\n":
if last:
examples[-1] += line
else:
examples.append(line)
last = True
else:
last = False
elif line == "\n":
# Ignore lines until first empty line
start = True
assert len(examples) > 1
return examples
def parse_poems(verbose):
""" Parse all poems in POEM_DIR.
Returns a dictionary from author to markov chain """
chains = {}
for name in os.listdir(config.POEM_DIR):
if verbose:
print("training " + name)
examples = []
for poem_file in os.listdir(config.POEM_DIR + "/" + name):
parse_poem(config.POEM_DIR + "/" + name + "/" + poem_file, examples)
m = markov()
for example in examples:
m.add(example, preserve_newlines=True)
chains[name] = m
return chains
def generate_poem(chain):
""" Generate a poem from a given markov chain """
next = chain.get_next(BEGIN)
line = next
next = chain.get_next(next)
while next != END:
if not line or line[-1] != "\n":
line += " "
line += next
next = chain.get_next(next)
return line
if __name__ == "__main__":
chains = parse_poems(True)
print("done!")
while True:
author = input("author? ")
print()
print(generate_poem(chains[author]))