-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpepper_runner.py
104 lines (83 loc) · 3.46 KB
/
pepper_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import threading, os, re, sys, platform
from paths import coptic_nlp_path
sys.path.insert(0, coptic_nlp_path)
from coptic_nlp import exec_via_temp
PY2 = sys.version_info[0] < 3
def compress_pepper_out(pepper_msg, full_log=False):
empty_spans = 0
if PY2:
pepper_msg = pepper_msg.decode("utf8")
pepper_out = pepper_msg.replace("\r", "")
lines = pepper_out.split("\n")
for line in lines:
if "no tokens contained in span" in line:
empty_spans += 1
# remove header
pepper_out = re.sub(r"^.*?\*\*", "", pepper_out, re.MULTILINE | re.DOTALL)
pepper_out = re.sub(r"^.*step 1", "", pepper_out, re.MULTILINE | re.DOTALL)
# remove job description
pepper_out = re.sub(r"-{4}-+.*?" + "-" * 78 + ".*?\+", "", pepper_out, re.MULTILINE | re.DOTALL)
# remove job status messages
pepper_out = re.sub(r"-+ pepper job status -+[^-]+-+", "", pepper_out, re.MULTILINE | re.DOTALL)
# remove empty span warnings
pepper_out = re.sub(r"input file.*?span will be ignored!", "", pepper_out)
# remove meta tag messages
pepper_out = re.sub(r"using meta tag '.*?'", "", pepper_out)
# remove encoding messages
pepper_out = re.sub(r"using input file encoding '.*?'", "", pepper_out)
# remove footer
pepper_out = re.sub(r"\*{4}\*+\n.*?\*{4}\*+", "", pepper_out, re.MULTILINE | re.DOTALL)
pepper_out = re.sub(r"\n +\n", r"\n", pepper_out, re.MULTILINE | re.DOTALL)
pepper_out = re.sub(r"\n+", r"\n", pepper_out, re.MULTILINE | re.DOTALL)
# Get pepper messages
messages = ""
m = re.search(r"(Conversion ended[^\n\r]*)", pepper_out)
if m is not None:
messages += m.group(1)
m = re.search(r"([^\n\r]*exception[^\n\r]*)", pepper_out)
if m is not None:
messages += m.group(1)
m = re.search(r"([^\n\r]*\.java:[^\n\r]*)", pepper_out)
if m is not None:
messages += m.group(1)
if not full_log:
messages += "\n\n(In case of errors you can get verbose pepper output using the -v flag)"
report = ""
if empty_spans > 0:
report += "\n i Pepper reports " + str(empty_spans) + " empty xml spans were ignored\n"
report += " i Pepper says:\n\n"
report += messages
if full_log:
report += "\n\nFull pepper output:\n\n" + pepper_msg
return report
def runner(pepper_params, output):
"""thread worker function"""
if platform.system() == "Linux":
pepper_cmd = [os.path.abspath("pepper") + os.sep + "pepperStart.sh", "-p", "tempfilename"]
else:
pepper_cmd = [os.path.abspath("pepper") + os.sep + "pepperStart.bat", "-p", "tempfilename"]
output[0] = exec_via_temp(pepper_params, pepper_cmd, os.path.abspath("pepper") + os.sep)
return
def cycle_spinner(spinner):
if spinner == "/":
return "-"
elif spinner == "-":
return "\\"
elif spinner == "\\":
return "|"
elif spinner == "|":
return "/"
def run_pepper(pepper_params, full_log=False):
# Open new thread for pepper so we don't lose control of the cli
threads = []
output = [""] # Placeholder variable to get output via modification by ref
t = threading.Thread(target=runner, args=(pepper_params, output))
threads.append(t)
t.start()
spinner = "/"
while t.isAlive():
spinner = cycle_spinner(spinner)
sys.__stdout__.write("Pepper is working... " + spinner + "\r")
t.join(1)
sys.__stdout__.write(" " * 30 + "\n")
return compress_pepper_out(output[0], full_log)