-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathCparser.py
119 lines (105 loc) · 3.3 KB
/
Cparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/python
####################################################################
# Name: Cparser.py
# Function: Accept any CheckPoint .C file and convert into Python list format
# Parameters:
# Arg1: Input file - Any .C file like cp-admin.C or objects.C
# Arg2: Output file
# Comment: I probably could of done this in 1 line in Perl, but this is
# python program #1 for me
#########################################################################
import re
import pprint
import sys
#
# Globals
#
idx=0 # line index
idy=0 # line list index
lines=[]
re_word=re.compile(r'(?P<word>[^\s\)\(]+)')
re_quote=re.compile(r'(?P<quote>".*?")')
re_spaces=re.compile(r'\s+') # lots of spaces/tabs
re_indent=re.compile(r'^\s+|:') # spaces at the beginning
re_colon=re.compile(r'^\s*:') # colon
re_single=re.compile(r'^\s*[)(:]') # if the line does not begin with ),( or : it's a part of a multiline statement
#
# Recursively process one character at a time
#
def next_token():
global idx, idy, lines
parendata = [] # Parendata keep track of the Python list we are building
while (idy < len(lines)):
# go through each character 1 at a time
while (idx < len(lines[idy])):
line = lines[idy]
# print "INPUT: " + str(idx) + ":" + line[idx] + " line# " + str(idy)
# Each '(' and ')' we will recursively decend and return
if (line[idx] == '('):
#print (line[idx] + "(Open")
idx += 1
returnlist = next_token()
parendata.append([item for item in returnlist])
elif (line[idx] == ')'):
idx += 1
return parendata
# process anything within quotation marks
elif (line[idx] == '"'):
# print line, line[idx] + " Quote " + str(idx) + " Line #" + str(idy+1)
_r = re_quote.search(line[idx:])
parendata.append(_r.group('quote'))
idx=idx + _r.end()
# ignore space characters
elif (line[idx].isspace()):
idx += 1
# process regular words
# type format is...
#(
#: (MMM_Global_Read-Only
# :AdminInfo (
# :chkpf_uid ("155A28A8-D9BC-4EEB-A78C-334326D4B67F}")
# So you will be picking up the MMM_Global_Read-Only, AdminInfo, chkpf_uid
else:
_r = re_word.search(line[idx:])
parendata.extend([_r.group('word')])
idx += _r.end()
idx = 0
idy += 1
return parendata
def cleanup(line):
line=line.strip()
line=re_spaces.sub(" ",line)
line=re_indent.sub("",line)
line=re_colon.sub("",line)
return line
###
### Parse the file and then pretty print output
###
def parse(fileout):
parendata=next_token()
pprint.pprint ( parendata,width=100,indent=1,stream=fileout)
return parendata
######################## MAIN ##################################
##
## open the input and output files and parse them
##
fileout=sys.stdout
from sys import argv, stdin
if len(argv) >= 2:
# Open input file
with open(argv[1],'r') as f:
for line in open(argv[1]):
# pprint.pprint (line)
if not re_single.search(line) and not len(lines) <= 1:
# print "=> not single"
# pprint.pprint(line)
lines[-1]=lines[-1]+" "+cleanup(line)
# pprint.pprint(lines[-1])
else:
lines.append(cleanup(line))
# open output file if present otherwise stdout
if len(argv) == 3:
fileout= open(argv[2],'w')
# parse the file
parse(fileout)
else: print >> sys.stderr, 'Args: [<.C file in> <outputfile> ]'