-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate_conll07.py
executable file
·105 lines (86 loc) · 2.74 KB
/
evaluate_conll07.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
import os
import subprocess
import tempfile
def read_gold(file):
sents = []
sent = []
for line in file:
line = line.strip()
if not line:
sents.append(sent)
sent = []
else:
parts = line.split('\t')
assert 8 <= len(parts) <= 10 # (id, form, lemma, cpos, pos, feats, head1, deprel, phead, pdeprel)
sent.append(parts)
if sent:
sents.append(sent)
return sents
def read_test(file):
sents = []
sent = []
for line in file:
line = line.strip()
if not line:
sents.append(sent)
sent = []
else:
parts = line.split('\t')
assert len(parts) == 4 # (form, pos, head0, deprel)
sent.append(parts)
if sent:
sents.append(sent)
return sents
def write_sents(path, sents):
with open(path, 'w') as f:
for sent in sents:
for parts in sent:
print('\t'.join(parts), file=f)
print(file=f)
def main(cmd_args):
# Read in the gold and parsed test files.
gsents = read_gold(cmd_args.gold_file)
tsents = read_test(cmd_args.test_file)
new_gsents = []
new_tsents = []
assert len(gsents) == len(tsents)
for gsent, tsent in zip(gsents, tsents):
assert len(gsent) == len(tsent)
new_gsent = []
new_tsent = []
for gparts, tparts in zip(gsent, tsent):
# Convert the gold part into the form required by the CoNLL07 eval script.
new_gparts = gparts[:8] + ['_', '_']
assert len(new_gparts) == 10
new_gsent.append(new_gparts)
# Convert the test part into the form required by the CoNLL07 eval script.
new_tparts = list(new_gparts)
assert new_tparts[1] == tparts[0]
new_tparts[6] = str(int(tparts[2]) + 1)
new_tparts[7] = tparts[3]
assert len(new_tparts) == 10
new_tsent.append(new_tparts)
assert len(new_gsent) == len(new_tsent)
new_gsents.append(new_gsent)
new_tsents.append(new_tsent)
# Output the reformatted gold and test files.
_, new_gpath = tempfile.mkstemp()
_, new_tpath = tempfile.mkstemp()
write_sents(new_gpath, new_gsents)
write_sents(new_tpath, new_tsents)
# Call the CoNLL07 eval script.
args = ['perl', 'eval07.pl', '-g', new_gpath, '-s', new_tpath]
if not cmd_args.eval_punct:
args.append('-p')
subprocess.check_call(args)
# Remove the temp files.
os.remove(new_gpath)
os.remove(new_tpath)
if __name__ == '__main__':
import argparse
ap = argparse.ArgumentParser(description='Runs an evaluation over nbest dependency parse output')
ap.add_argument('gold_file', type=open, help='gold file to evaluate against')
ap.add_argument('test_file', type=open, help='test nbest file to evaluate')
ap.add_argument('--eval-punct', action='store_true', default=False, help='Evaluate punctuation')
main(ap.parse_args())