-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathWAFme.py
524 lines (473 loc) · 21.4 KB
/
WAFme.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
#!//usr/bin/env python
# -------------------------------------------------------------------------------
# Name: ModSecurity audit log extractor
# Purpose: Build ModSecurity rules to whitelist CRS
#
# Author: mleos
#
# Created: 5/10/2017
#
# Copyright: (c) spartantri cybersecurity
# Licence: Apache2
# -------------------------------------------------------------------------------
import json, re, signal, os
import RuleEditor, tail
from collections import namedtuple
#CWD
cwd=os.getcwd()
#Starting rule id to use if no rule is found
new_rule_id=37173
#Keep initial rule for computing stats
initial_rule_id=new_rule_id
#Rule id increases
increase_rule_id=10
#Audit log file to tail
audit_log=''.join([cwd,'/audit.log'])
#Rule set output file
rules_output=''.join([cwd,'/REQUEST-903.9003-CUSTOMAPP-EXCLUSION-RULES.conf'])
#Webbserver restart command or script to execute to load the rules produced
restart_command=''.join([cwd,'/apache_restart.sh'])
#result dictionary keep track of identified rule ids and matching elements, keys are "id,uri"
#result contents are lists of matched variables and the hit count
result=sample_requests={}
#actions list stores the actions extracted from the modsecurity reference manual
#variables list stores the variables extracted from the modsecurity reference manual
#operators list stores the operators extracted from the modsecurity reference manual
#transforms list stores the transforms extracted from the modsecurity reference manual
#min_instances variable is the minimum number of times a match must be added into a global white list
#skipper variables list is used to add items that exceeded the min_instances threshold
#skipper holds variables found in multiple different URI
actions=variables=operators=transforms=skipper=tags=list()
#Number of different URI where a variable must be present to white list it globally
min_instances=10
#rule_parents dictionary is used to whitelist rules that are derivated from another one
#rule_parents keys are the derivated rule id and the content list have the regex of the derivated rule match,
#the parent rule id and the variable used in the parent rule
rule_parents={'921180':['TX:paramcounter_','921170','ARGS_NAMES']}
#rule_sensitive list contains the rules the will issue additional warning as they are critical to security
#rule_sensitive contains rules that use detectxss and detectsqli
rule_sensitive=['941100','941101','942100']
#rule_ignore list are rules to be ignored and no action will be taken
rule_ignore=['981405','952100']
#replacement_element dictionary contains replacements for items that contain variable names eg. tailing MD5
replacement_element={"REQUEST_COOKIES:wordpress_sec_[a-zA-Z0-9]{10-40}":"REQUEST_COOKIES:/wordpress_sec_*/",
"REQUEST_COOKIES:wordpress_logged_in_[a-zA-Z0-9]{10-40}":"REQUEST_COOKIES:/wordpress_logged_in_*/"}
def largest_id():
#Find the max rule id number in the ruleset file
global new_rule_id, increase_rule_id, rules_output
val = max_num = new_rule_id
#Go through the ruleset file and get the ids and return the max value + 1 to use as next rule id
try:
with open(rules_output, 'r') as data:
for line in data.readlines(): # read the lines as a generator to be nice to my memory
try:
id=re.search('id:\'?(\d+)\'?',line)
if id:
val = int(id.group(1))
del id
except ValueError:
val = 0
if val > max_num:
max_num = val
except:
max_num = 0
if new_rule_id <= max_num:
new_rule_id = max_num+increase_rule_id
return
def find_values(id, json_repr):
#Extract from json content the requested section and return it
results = []
def _decode_dict(a_dict):
try: results.append(a_dict[id])
except KeyError: pass
return a_dict
json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.
return results
def extractor(jsonlog):
#Process tail and extract rule messages
global result
#Use line to build log line
line=''
#Extract URI from request_line field using regex to ignore method any parameters after ? and protocol
uri=re.search('^\w+\s(\/[^\?\s]+)\??.*\sHTTP\/(?:(?:1|2)\.?(?:1|0)?)$', find_values('request_line', jsonlog)[0])
#If URI was successfully captured use it, otherwise get the entire request_line and use it
if uri:
line=' '.join([line, uri.group(1)])
else:
line=' '.join([line, find_values('request_line', jsonlog)[0]])
#Get the transaction unique_id
txid=find_values('transaction_id', jsonlog)[0]
#Iterate through the rule messages looking for known patterns to extract matching rule and offending argument
for log in find_values('messages', jsonlog):
for event in log:
#Initializing variables for every loop
id_check=var_check=False
#Get the rule id of current rule message, if matched use it on the log line otherwise use "noid" instead
id=re.search('\[id "([^"]+)"\]', log[0])
if id:
line=' '.join([line, str(id.group(1))])
id_check=True
else:
line=' '.join([line, 'noid'])
#Get the matched variable
var=re.search('\[data "(?:M|m)atch.*found\swithin\s(\S+?):?\s', log[0])
#If there is no match for the regex then use complementary regex and try to get a match and use it in the log line
if not var:
var=re.search('(?:(?:M|m)atch|(?:F|f)ound).*?\s(?:at|against|in)\s(\S+?)\.?\s', log[0])
if var:
line=' '.join([line, var.group(1)])
var_check=True
else:
line=' '.join([line, log[0]])
#If both id and var got matched then add the item to rule to output and add sample payload to replay
#Print a dot to get some motion on screen
#If no matches the simply throw the log line to screen as ERROR
if id_check==True and var_check==True:
add_item(id.group(1), uri.group(1), var.group(1))
add_sample(id.group(1), uri.group(1), var.group(1), jsonlog)
print '.',
else:
line=' '.join([line, txid])
print 'ERROR:',
print line
#Destroy var and id variables and reset log line
del var
del id
line=''
return
def sigint_handler(signum, frame):
global result, sample_requests, initial_rule_id, new_rule_id
#Whenever a CTRL+C is pressed process the rules to output
global new_rule_id, initial_rule_id
#Process rules and output to rule set
print_rules()
#If there are no changes then exit, otherwise start over
if new_rule_id==initial_rule_id:
print 'No new rules generated...exiting'
exit(0)
else:
initial_rule_id=new_rule_id
resp = confirm('Do you want to restart the Web server service and continue analyzing the log', True)
if resp:
#Restart the webserver process to load the new configuration
retvalue = os.system(restart_command)
#Print output from restart command
print retvalue
#Start over
result=sample_requests={}
initial_rule_id=new_rule_id
main()
else:
resp = confirm('Do you want to restart the Web server service before exiting', True)
if resp:
# Restart the webserver process to load the new configuration
retvalue = os.system(restart_command)
# Print output from restart command
print
retvalue
else:
exit(0)
signal.signal(signal.SIGINT, sigint_handler)
def add_item(id, uri, var):
#Add matching rule to dictionary to be processed
global result
#Build the cictionary item key as "ruleid,uri"
item=''.join([id,',',uri])
#If the incoming item exists add 1 to the counter of that element, otherwise initialize it
#var is the matching element eg ARGS:element
if var not in result.setdefault(item, {}):
result.setdefault(item, {})[var]=1
else:
result.setdefault(item, {})[var]+=1
return
def add_sample(id, uri, var, content):
#Build from the auditlog message a sample request to replay it
#Get the request section of the audit log
request=find_values('request', content)[0]
#Printing the request section for debug purposes
#print request
#Extract the interesting parts from the request to build the sample
headers=request["headers"]
request_line=request["request_line"]
uri_check=re.search('^(\w+)\s(\/[^\?\s]+)\??(.*)\sHTTP\/(?:(?:1|2)\.?(?:1|0)?)$', request_line)
if not uri_check:
print request_line
if uri != uri_check.group(2):
print uri_check.group(1),uri_check.group(2),uri_check.group(3)
else:
method=uri_check.group(1)
request_filename=uri_check.group(2)
args=uri_check.group(3)
body=request.setdefault("body", [])
#Get the transaction section of the audit log to get the target to replay the request
transaction=find_values('transaction', content)[0]
#Printing the transaction section for debug purposes
#print transaction
'''
{u'local_port': 443, u'remote_port': 37536, u'remote_address': u'78.227.109.215', u'time': u'18/Oct/2017:01:09:34 +0000', u'local_address': u'172.31.26.26', u'transaction_id': u'Weapzn8AAQEAAEikCSkAAAAI'}
'''
transaction_id=transaction["transaction_id"]
print transaction_id
remote_address=transaction["remote_address"]
remote_port=transaction["remote_port"]
local_address=transaction["local_address"]
#local_port=transaction["local_port"]
sample='import requests\n'
for header_key in headers.keys():
host_header=re.search("(?i)(host)", header_key)
if host_header:
host_header_name=host_header.group(1)
if uri_check:
if method == "GET" or method == "HEAD":
sample=''.join([sample,'requests.get("https://',headers[host_header_name],request_filename,'"'])
if len (args)>0:
sample=''.join([sample,'?',args,'"'])
else:
sample=''.join([sample,'"'])
sample=''.join([sample,',headers=',str(headers),')'])
elif method == "POST":
sample=''.join([sample,'requests.post("https://',headers[host_header_name],request_filename,'", data="',body[0],'"'])
if len(sample) > 20:
print sample
return
def print_rules():
#Trigger rule generation and global whitelisting
global result, variables, min_instances, skipper
#Build variable identification regex to match all identified variables from reference manual
variables_rx='|'.join(var.name for var in variables)
variables_rx=''.join(['^(',variables_rx,')'])
#Print the summary of rule ids, uris and variables identified
print result
#Get reduced list of elements
elements=shrinker()
#Check which elements appear in more different URI than the min_instances threshold
for a in elements.keys():
if elements[a] > min_instances:
skipper.append(a)
#Iterate through the result dictionary and search for variables to print messages to screen
for e in result.keys():
id, uri = e.split(',', 1)
for i in result[e].keys():
prob=re.search(variables_rx, result[e].keys()[0])
if prob:
print "#The rule %s matched %s from %s %s times at uri %s" % (id, prob.group(1), result[e].keys()[0], result[e][i], uri)
else:
print "#The rule %s matched %s %s times at uri %s" % (id, result[e].keys()[0], result[e][i], uri)
#print id, result[e].keys(), result[e], uri
#Generate individual rules to whitelist variables per rule id identified per URI (SecRule with ctl)
rule_skeleton(id, result[e].keys(), result[e], uri)
#Generate whitelist of identified variables to be whitelisted (SecRuleUpdateTargetById)
rule_globals()
return
def rule_skeleton(id, target, match, uri):
#Generate the rule to whitelist elements from a rule id on a given URI
global new_rule_id, increase_rule_id, rule_parents, skipper, tags
counter=0
#Check if rule have a related rule and do the whitelisting on it instead
if id in rule_parents:
#Rule was found in rule_parents and have to be substituted
comment='#Sibling rule %s triggered on %s at %s\n' % (id, target[0], uri)
rx=''.join(['^',rule_parents[id][0],'(.*)'])
#Use the rule id of the related rule in rule_parents
id=rule_parents[id][1]
original_target=re.search(rx, target[0])
comment=''.join([comment,'#Parent rule %s whitelisting %s at %s\n']) % (id, original_target.group(1), uri)
target[0]=original_target.group(1)
else:
#If there is no match then initialize comment variable
comment=''
#Add comment to specify variable and URI
comment=''.join([comment,'#%s whitelisted from %s\n' % (target[0], uri)])
#Build rule using REQUEST_FILENAME ending with the URI
sk_ctlruleremovetargetbyid='SecRule %s "@endsWith %s$" \\\n' % ('REQUEST_FILENAME', uri)
#Set rule id, phase:2, no transform, nolog, pass
sk_ctlruleremovetargetbyid_actions=',\\\n '.join(['"id:%s' % str(new_rule_id), 'phase:2', 't:none', 'nolog', 'pass'])
#Adding tags
for tag in tags:
sk_ctlruleremovetargetbyid_actions=',\\\n '.join([sk_ctlruleremovetargetbyid_actions, tag])
#Add 4 heading blank spaces
sk_ctlruleremovetargetbyid_actions=''.join([' ', sk_ctlruleremovetargetbyid_actions])
#sk_ctlruleremovetargetbyid_actions=''.join([sk_ctlruleremovetargetbyid_actions, ',\\\n '])
#Initialize target whitelist
target_list=''
#Iterate through the list of items for a given URI and rule id
for ctl in target:
if ctl not in skipper:
#Add a ctl to remove target by id If the variable is not in the list of global_whitelist to skip
sk_ctlruleremovetargetbyid_1='ctl:ruleRemoveTargetById=%s;%s' % (id, ctl)
target_list=',\\\n '.join([target_list, sk_ctlruleremovetargetbyid_1])
#Add 1 to counter of elemets added to the rule
counter+=1
#Add tailing \n
target_list=''.join([target_list, '"\n'])
#Build the entire rule with heading comment, main rule body, the list of ctl and tailing \n
rule=''.join([comment, sk_ctlruleremovetargetbyid, '', sk_ctlruleremovetargetbyid_actions, target_list])
#If rule have no ctl (made it to the global whitelist), print the rule and save it to the rule set file
if counter>0:
print rule
with open(rules_output, 'a+') as file:
file.write(rule)
#Increase the rule id for next rule
new_rule_id+=increase_rule_id
return
def get_parent(id, target):
#Check if a rule have a related rule id and return the replacement id and variable to use
global rule_parents, replacement_element
child=[]
#Check if the received target is type list, if not then convert it to list
if type(target) is list:
child=target
else:
child=[target]
#Check if rule have related rule
if id in rule_parents:
#rule_parents[id][0] holds the regex to match in the current rule
rx=''.join(['^',rule_parents[id][0],'(.*)'])
#rule_parents[id][1] contains the rule id of the related rule
id=rule_parents[id][1]
original_target=re.search(rx, child[0])
child[0]=original_target.group(1)
#Check if there is a variable that have to be replaced and do the switch
for k in replacement_element.keys():
match=re.search(k, child[0])
if match:
child[0]=replacement_element[k]
return id, child
def rule_globals():
#Build SecRuleUpdateTargetById to whitelist variables in all URI
global result, rule_parents, skipper, rule_sensitive
#Initialize the whitelist dictionary
global_whitelist={}
#Add comments
rules="#Site wide whitelisted elements\n"
#Iterate thourgh the result dictionary
for e in result.keys():
id, uri = e.split(',', 1)
#Iterate through the variables for every
for i in result[e].keys():
#If a variable is listed in the skipper list add it to the global_whitelist dictionary
if i in skipper:
if id not in global_whitelist.setdefault(i, []):
global_whitelist[i]=[id]
else:
global_whitelist[i].append(id)
#Iterate through the global_whitelist just built
for r in global_whitelist.keys():
for item in global_whitelist[r]:
#Get the related rule if any
new_item, new_target = get_parent(item, [r])
if item==new_item:
rule="SecRuleUpdateTargetById %s !%s" % (item, r)
#If rule is listed as sensitive throw additional warning
if item in rule_sensitive:
print "#Warning whitelisting sensitive rule! - %s" % item
else:
rule="SecRuleUpdateTargetById %s !%s" % (new_item, new_target[0])
#If replaced rule is listed as sensitive throw additional warning
if new_item in rule_sensitive:
print "#Warning whitelisting sensitive rule! - %s" % item
#Open the rule set file and check if the item to be whitelisted is already presend
with open(rules_output, 'r+') as file:
ruleset=file.read()
#If the item is not already listed add it, otherwise ignore it
if rule not in ruleset:
rule=''.join([rule, "\n"])
rules=''.join([rules, rule])
#If the rules have something else other than the inital comment, save it
if rules != "#Site wide whitelisted elements\n":
with open(rules_output, 'a+') as file:
file.write(rules)
print rules
return
def shrinker():
#Shrink ruleset by removing duplicates
global result
elements={}
for e in result.keys():
id, uri = e.split(',', 1)
for i in result[e].keys():
if i not in elements.setdefault(i, {}):
elements.setdefault(i, {})[i]=1
else:
elements.setdefault(i, {})[i]+=1
return elements
def ruleset_control():
#General ruleset control comments and settings
global new_rule_id, increase_rule_id, tags
import time
#Start time
start_time=str(time.time()).replace(".","")
#Rule set prolog
ruleset_vars=''.join(['SecAction ', '"id:%s' % str(new_rule_id), 'phase:2', "setvar:'tx.wafme_debuglevel=0'", 'noauditlog', 'nolog', 'pass'])
ruleset_header='SecMarker %s_START' % start_time
ruleset_trailer='SecMarker %s_FINISH' % start_time
tags.append("tag:'wafme_%s'" % start_time)
return
def validate_files(files):
for access_type in files:
for item in files[access_type]:
print 'Testing file access %s' % item
if os.access(item, os.F_OK):
if os.access(item, access_type):
print 'File %s exists and permissions are ok' % item
else:
print 'File %s permissions are wrong' % item
print oct(os.stat(item).st_mode & 0777)
else:
print "File %s does not exist" % item
os.system(''.join(['touch', ' ', rules_output]))
return
def confirm(prompt=None, resp=False):
"""ActiveState receipe, prompts for yes or no response from the user. Returns True for yes and
False for no.
'resp' should be set to the default value assumed by the caller when
user simply types ENTER.
>>> confirm(prompt='Create Directory?', resp=True)
Create Directory? [y]|n:
True
>>> confirm(prompt='Create Directory?', resp=False)
Create Directory? [n]|y:
False
>>> confirm(prompt='Create Directory?', resp=False)
Create Directory? [n]|y: y
True"""
if prompt is None:
prompt = 'Confirm'
if resp:
prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
else:
prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
while True:
ans = raw_input(prompt)
if not ans:
return resp
if ans not in ['y', 'Y', 'n', 'N']:
print
'please enter y or n.'
continue
if ans == 'y' or ans == 'Y':
return True
if ans == 'n' or ans == 'N':
return False
def main():
global variables, new_rule_id, increase_rule_id, audit_log, rules_output
#new_rule_id,increase_rule_id, audit_log, rules_output, restart_command, min_instances, rule_sensitive, rule_ignore
files={os.R_OK:[audit_log], os.W_OK:[rules_output], os.X_OK:[restart_command]}
validate_files(files)
#Read the modsecurity reference manual
soup = RuleEditor.get_ref_manual()
#Read all the variables into variables dictionary
variables = RuleEditor.get_ref_section('Variables', soup)
#Get starting id to use for rules
largest_id()
ruleset_control()
print 'Starting rule id will be : %d' % new_rule_id
print 'Increases to rule id will be : %d' % increase_rule_id
#Declare audit log file and function to process all events
t=tail.Tail(audit_log)
t.register_callback(extractor)
print 'Press CTRL+C to finish tailing %s and output the rules to %s' % (audit_log, rules_output)
#Tail and follow the log
t.follow()
if __name__ == '__main__':
main()