-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfindCollisions.py
107 lines (97 loc) · 3.42 KB
/
findCollisions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from collections import Counter
def countCollisions(entries,filename='output_shortlist.txt'):
collisions = [k for k,v in Counter(entries).items() if v>1]
num_collisions = len(collisions)
print('number of collisions: ' + str(num_collisions))
with open(filename,'r') as f:
for i, l in enumerate(f):
pass
numLines = i+1
print('rate of collisions: ' + str(round(num_collisions * 100.0 / numLines, 2)) + ' %')
print('word collisions: ' + str(collisions))
return num_collisions
def countCollisionsInFile(filename,cv=False,cvc=False,allofinal=False,justCons=False):
entries = []
with open(filename,'r') as f:
for line in f:
# get just the output words
entry = line.split(',')[0].replace(' \'','')
if cv:
entry = justTwoInitSylls_CV(entry)
elif cvc:
entry = justTwoInitSylls_CVC(entry)
elif allofinal:
entry = justTwoInitSylls_CVC_AlloFinal(entry)
elif justCons:
entry = justConsonants(entry)
entries.append(entry)
return countCollisions(entries,filename)
def countCollisionsInList(entries):
return countCollisions(entries)
def justTwoInitSylls_CV(word):
beforeThisIndex = 0
afterThisIndex = 0
for vowel1 in word:
if vowel1 in 'aeiou':
afterThisIndex = word.index(vowel1)
break
for vowel2 in word[afterThisIndex+1:]:
if vowel2 in 'aeiou':
beforeThisIndex = word[afterThisIndex+1:].index(vowel2)+1 + afterThisIndex+1
break
if beforeThisIndex!=0:
word = word[:beforeThisIndex]
return word
def justTwoInitSylls_CVC(word):
beforeThisIndex = 0
afterThisIndex = 0
for vowel1 in word:
if vowel1 in 'aeiou':
afterThisIndex = word.index(vowel1)
break
for vowel2 in word[afterThisIndex+1:]:
if vowel2 in 'aeiou':
beforeThisIndex = word[afterThisIndex+1:].index(vowel2)+1 + afterThisIndex+1
break
if beforeThisIndex!=0:
word = word[:beforeThisIndex+1]
return word
def justTwoInitSylls_CVC_AlloFinal(word):
word = list(justTwoInitSylls_CVC(word))
allophones = {
'e':'a', 'i':'a', 'o':'a', 'u':'a',
'p':'b',
'c':'z', 'j':'z', 's':'z',
't':'d',
'f':'v',
'k':'g', 'q':'g',
'x':'h',
'r':'l',
'n':'m'
}
if word[-1] in allophones:
word[-1] = allophones[word[-1]]
return ''.join(word)
def justConsonants(word):
word = list(justTwoInitSylls_CVC(word))
vowels = {'a':'','e':'','i':'','o':'','u':''}
startIndex = 0
if word[0] in vowels:
startIndex = 1
for i in range(startIndex,len(word)):
letter = word[i]
if letter in vowels:
word[i] = vowels[letter]
return ''.join(word)
if __name__ == '__main__': # if running this .py file directly
fileName = 'output_shortlist.txt'
print('\n--- whole words: ---')
countCollisionsInFile(fileName)
print('\n--- cv: 1st 2 syllables: ---')
countCollisionsInFile(fileName,cv=True)
print('\n--- cvc: 1st 2 syllables: ---')
countCollisionsInFile(fileName,cvc=True)
print('\n--- cvc : 1st 2 syllables + ALLO FINAL: ---')
countCollisionsInFile(fileName,allofinal=True)
print('\n--- cvc : 1st 2 syllables + CONSONANTS ONLY: ---')
countCollisionsInFile(fileName,justCons=True)