This repository has been archived by the owner on Dec 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxmlParser.py
356 lines (288 loc) · 10.9 KB
/
xmlParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
import xml.etree.ElementTree as ElTree
import sys
import re
from instuction import Instruction
"""
kontroluje formalni spravnost korenoveho elementu xml struktury
"""
def check_root(root):
# kontrola korenoveho elementu
if root.tag != 'program':
print("Korenovy element neni program.", file=sys.stderr)
exit(32)
# kontrola atributu a obsahu korenoveho elementu
is_lan = False
for atr in root.attrib:
if atr not in ['language', 'name', 'description']:
print("Neni zadany spravny atribut korenoveho elementu.", file=sys.stderr)
exit(32)
if atr == 'language':
is_lan = True
if not is_lan:
print("Neni zadany 'language' korenoveho elementu.", file=sys.stderr)
exit(32)
if root.attrib['language'].upper() != 'IPPCODE20':
print("Neni zadany spravny text korenoveho elementu.", file=sys.stderr)
exit(32)
"""
pro kazdy element instrukce kontroluje jeho formalni spravnost tzn:
order je int, nazev elementu, duplicita, atributy a podobne i u argumentu instrukce
"""
def check_elem(elem, order):
if elem.tag != 'instruction':
print("Ocekavan element instrukce.", file=sys.stderr)
exit(32)
# kazda instrukce ma poradi a opcode
for atr in elem.attrib:
if atr != 'order' and atr != 'opcode':
print("Neni zadany spravny atribut elementu instrukce.", file=sys.stderr)
exit(32)
# duplicita a zaporne poradi instrukci
try:
int(elem.attrib['order'])
except (ValueError, TypeError):
print("Nevalidni poradi instrukce.", file=sys.stderr)
exit(32)
if order.__contains__(int(elem.attrib['order'])):
print("Duplicitni poradi instrukce.", file=sys.stderr)
exit(32)
if int(elem.attrib['order']) < 0:
print("Zaporne poradi instrukce.", file=sys.stderr)
exit(32)
order.append(int(elem.attrib['order']))
# kontrola syntaxu argumentu instrukce
arg_count = []
for arg in elem:
# cislovani argumentu check
if arg_count.__contains__(arg.tag):
print("Duplicitni poradi argumentu.", file=sys.stderr)
exit(32)
arg_count.append(arg.tag)
# kontrola ze ma kazdy argument typ
for atr in arg.attrib:
if atr != 'type':
print("Kazdy argument musi mit svuj type.", file=sys.stderr)
exit(32)
"""
Pro kazdou instrukci zkontroluje pocet jejich argumentu
"""
def instr_arg_count(elem):
if elem.attrib['opcode'] in ['MOVE', 'TYPE', 'NOT', 'STRLEN', 'INT2CHAR', 'READ']:
if len(elem) != 2:
e_wrong_argcount(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['CREATEFRAME', 'PUSHFRAME', 'POPFRAME', 'RETURN', 'BREAK']:
if len(elem) != 0:
e_wrong_argcount(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['DEFVAR', 'POPS', 'CALL', 'LABEL', 'JUMP', 'PUSHS', 'WRITE', 'DPRINT', 'EXIT']:
if len(elem) != 1:
e_wrong_argcount(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['ADD', 'SUB', 'MUL', 'IDIV', 'LT', 'GT', 'EQ', 'JUMPIFEQ', 'JUMPIFNEQ',
'AND', 'OR', 'GETCHAR', 'STRI2INT', 'CONCAT', 'SETCHAR']:
if len(elem) != 3:
e_wrong_argcount(elem.attrib['opcode'])
else:
print("Chybna instrukce", elem.attrib['opcode'], file=sys.stderr)
exit(32)
"""
Pro kazdou instrukci zkontroluje lexikalni spravnost kazdeho argumentu
"""
def instr_arg_sytax(elem):
if elem.attrib['opcode'] in ['MOVE', 'TYPE', 'NOT', 'STRLEN', 'INT2CHAR']:
if elem[0].tag == 'arg1':
check_var(elem[0].text, elem[0].attrib['type'])
elif elem[0].tag == 'arg2':
check_symb(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[1].tag == 'arg1':
check_var(elem[1].text, elem[1].attrib['type'])
elif elem[1].tag == 'arg2':
check_symb(elem[1].text, elem[1].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['DEFVAR', 'POPS']:
if elem[0].tag == 'arg1':
check_var(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['CALL', 'LABEL', 'JUMP']:
if elem[0].tag == 'arg1':
check_label(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['PUSHS', 'WRITE', 'DPRINT', 'EXIT']:
if elem[0].tag == 'arg1':
check_symb(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['ADD', 'SUB', 'MUL', 'IDIV', 'LT', 'GT', 'EQ',
'AND', 'OR', 'GETCHAR', 'STRI2INT', 'CONCAT', 'SETCHAR']:
if elem[0].tag == 'arg1':
check_var(elem[0].text, elem[0].attrib['type'])
elif elem[0].tag in ['arg2', 'arg3']:
check_symb(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[1].tag == 'arg1':
check_var(elem[1].text, elem[1].attrib['type'])
elif elem[1].tag in ['arg2', 'arg3']:
check_symb(elem[1].text, elem[1].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[2].tag == 'arg1':
check_var(elem[2].text, elem[2].attrib['type'])
elif elem[2].tag in ['arg2', 'arg3']:
check_symb(elem[2].text, elem[2].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['JUMPIFEQ', 'JUMPIFNEQ']:
if elem[0].tag == 'arg1':
check_label(elem[0].text, elem[0].attrib['type'])
elif elem[0].tag in ['arg2', 'arg3']:
check_symb(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[1].tag == 'arg1':
check_label(elem[1].text, elem[1].attrib['type'])
elif elem[1].tag in ['arg2', 'arg3']:
check_symb(elem[1].text, elem[1].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[2].tag == 'arg1':
check_label(elem[2].text, elem[2].attrib['type'])
elif elem[2].tag in ['arg2', 'arg3']:
check_symb(elem[2].text, elem[2].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
elif elem.attrib['opcode'] in ['READ']:
if elem[0].tag == 'arg1':
check_var(elem[0].text, elem[0].attrib['type'])
elif elem[0].tag == 'arg2':
check_type(elem[0].text, elem[0].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
if elem[1].tag == 'arg1':
check_var(elem[1].text, elem[1].attrib['type'])
elif elem[1].tag == 'arg2':
check_type(elem[1].text, elem[1].attrib['type'])
else:
e_wrong_arg(elem.attrib['opcode'])
def e_wrong_argcount(opcode):
print("Nespravny pocet argumentu instrukce", opcode, file=sys.stderr)
exit(32)
def e_wrong_arg(opcode):
print("Nespravny argument instrukce", opcode, file=sys.stderr)
exit(32)
def e_wrong_symb(symb):
print("Nespravny format typu", symb, file=sys.stderr)
exit(32)
def check_var(var, arg_type):
if arg_type != 'var':
e_wrong_arg("")
if not re.match('^(GF|LF|TF)@((_|-|\$|&|%|\*|!|\?|[a-zA-Z])+(_|-|\$|&|%|\*|!|\?|[a-zA-Z0-9])*)$', var):
e_wrong_arg("")
def check_symb(symb, arg_type):
if arg_type == 'var':
check_var(symb, arg_type)
elif arg_type == 'int':
check_int(symb)
elif arg_type == 'bool':
check_bool(symb)
elif arg_type == 'string':
check_string(symb)
elif arg_type == 'nil':
check_nil(symb)
else:
e_wrong_arg("")
def check_int(symb):
if symb == '':
e_wrong_symb('int')
def check_bool(symb):
if not re.match('^(true|false)$', symb):
e_wrong_symb('bool')
def check_string(symb):
if symb is not None:
if not re.match('^(\\\\[0-9]{3}|[^\\\])*$', symb):
e_wrong_symb('string')
def check_nil(symb):
if not re.match('^nil$', symb):
e_wrong_symb('nil')
def check_label(symb, arg_type):
if arg_type != 'label':
e_wrong_arg("")
if not re.match('^((_|-|\$|&|%|\*|!|\?|[a-zA-Z])+(_|-|\$|&|%|\*|!|\?|[a-zA-Z0-9])*)$', symb):
e_wrong_symb('label')
def check_type(symb, arg_type):
if arg_type != 'type':
e_wrong_arg("")
if not re.match('^(int|bool|string)$', symb):
e_wrong_symb('type')
"""
Vyuziva ostatni funkce pro kontrolu syntaxe/lexu instrukci a jejich argumentu
"""
def check_syntax(root):
order = []
# kontrola jednotlivych elementu instrukci
for elem in root:
# check elem syntax
check_elem(elem, order)
instr_arg_count(elem)
instr_arg_sytax(elem)
# check inst order numbers
"""
order.sort()
check = 1
for x in order:
if x != check:
print("Nenavazujici poradi instrukci.", file=sys.stderr)
exit(32)
check += 1
"""
"""
volan az po vsech kontrolach, vytvori objekty argumentu, kterymi naplni objekty instrukce
objekty instrukce jsou pote ukladany do seznamu instrukci, ktery tato funkce vraci
"""
def fill_inst_list(root):
inst_list = []
for elem in root:
inst = Instruction(elem.attrib['opcode'], elem.attrib['order'])
for arg in elem:
if arg.attrib['type'] == 'string':
for found in re.findall("\\\\[0-9][0-9][0-9]", str(arg.text)):
ascii_val = found.lstrip('\\')
arg.text = arg.text.replace(found, chr(int(ascii_val)))
inst.add_arg(arg.attrib['type'], arg.text, arg.tag)
inst_list.append(inst)
return inst_list
def get_key(obj):
return obj['order']
"""
Funkce zpracuje vstupni xml soubor
Vraci seznam instrukci s jeho argumenty
"""
def parse(source_file, content):
root = ElTree.ElementTree
tree = ElTree.ElementTree
# pokud je sourcefile 0, parsuje se stdin
if source_file != 0:
try:
tree = ElTree.parse(source_file)
root = tree.getroot()
except ElTree.ParseError:
print("Nespravne zformovan xml vstup.", file=sys.stderr)
exit(31)
else:
try:
root = ElTree.fromstring(content)
except ElTree.ParseError:
print("Nespravne zformovan xml vstup.", file=sys.stderr)
exit(31)
# kontrola korenoveho elem
check_root(root)
# kontrola syntaxe a lexu instrukci
check_syntax(root)
# vytvoreni seznamu instrukci
inst_list = fill_inst_list(root)
# sort instructions by order
inst_list.sort(key=lambda x: int(x.order), reverse=False)
return inst_list