This repository has been archived by the owner on Oct 24, 2020. It is now read-only.
forked from EFForg/https-everywhere
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrivial-validate.py
executable file
·203 lines (172 loc) · 6.43 KB
/
trivial-validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python2.7
import glob
import argparse
import os
import re
import sys
from lxml import etree
from collections import Counter
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Ruleset validation script.")
parser.add_argument('--quiet', action="store_true",
default=False, help="Suppress debug output."
)
args = parser.parse_args()
quiet = args.quiet
def warn(s):
if not quiet:
sys.stdout.write("warning: %s\n" % s)
def fail(s):
sys.stdout.write("failure: %s\n" % s)
# Precompile xpath expressions that get run repeatedly.
xpath_exclusion_pattern = etree.XPath("/ruleset/exclusion/@pattern")
xpath_cookie_host_pattern = etree.XPath("/ruleset/securecookie/@host")
xpath_cookie_name_pattern = etree.XPath("/ruleset/securecookie/@name")
# Load lists of ruleset names whitelisted for duplicate rules
thispath = os.path.dirname(os.path.realpath(__file__))
with open(thispath + '/duplicate-whitelist.txt') as duplicate_fh:
duplicate_allowed_list = [x.rstrip('\n') for x in duplicate_fh.readlines()]
filenames = glob.glob(thispath + '/../src/chrome/content/rules/*')
def test_bad_regexp(tree, rulename, from_attrib, to):
# Rules with invalid regular expressions.
"""The rule contains an invalid extended regular expression."""
patterns = (from_attrib + xpath_exclusion_pattern(tree) +
xpath_cookie_host_pattern(tree) + xpath_cookie_name_pattern(tree))
for pat in patterns:
try:
re.compile(pat)
except:
return False
return True
def unescaped_dot(s):
escaped = False
bracketed = False
for c in s:
if c == "\\":
escaped = not escaped
elif not escaped and c == "[":
bracketed = True
elif not escaped and c == "]":
bracketed = False
elif not escaped and not bracketed and c == ".":
return True
elif not bracketed and c == "/":
break
else:
escaped = False
return False
def test_unescaped_dots(tree, rulename, from_attrib, to):
# Rules containing unescaped dots outside of brackets and before slash.
# Note: this is meant to require example\.com instead of example.com,
# but it also forbids things like .* which usually ought to be replaced
# with something like ([^/:@\.]+)
"""The 'from' rule contains unescaped period in regular expression. Try escaping it with a backslash."""
for f in from_attrib:
s = re.sub("^\^https?://", "", f)
if unescaped_dot(s):
return False
return True
def test_unescaped_dots_in_exclusion(tree, rulename, from_attrib, to):
"""The 'exclusion' tag contains unescaped period in regular expression. Try escaping it with a backslash."""
pattern_attrib = etree.XPath("/ruleset/exclusion/@pattern")(tree)
for f in pattern_attrib:
if unescaped_dot(f):
return False
return True
xpath_rule = etree.XPath("/ruleset/rule")
def test_unencrypted_to(tree, rulename, from_attrib, to):
# Rules that redirect to something other than https or http.
# This used to test for http: but testing for lack of https: will
# catch more kinds of mistakes.
"""Rule redirects to something other than https."""
for rule in xpath_rule(tree):
to = rule.get("to")
if to[:6] != "https:":
return False
return True
printable_characters = set(map(chr, list(range(32, 127))))
def test_non_ascii(tree, rulename, from_attrib, to):
# Rules containing non-printable characters.
"""Rule contains non-printable character in 'to' pattern."""
for t in to:
for c in t:
if c not in printable_characters:
return False
return True
def is_valid_target_host(host):
# Rules where a target host contains multiple wildcards or a slash.
"""The target host must be a hostname, not URL, and must use at most one wildcard."""
if "/" in host:
return False
if host.count("*") > 1:
return False
return True
def nomes_all(where=sys.argv[1:]):
"""Returns generator to extract all files from a list of files/dirs"""
if not where: where=['.']
for i in where:
if os.path.isfile(i):
yield i
elif os.path.isdir(i):
for r, d, f in os.walk(i):
for filename in f:
yield os.path.join(r, filename)
tests = [
test_bad_regexp,
test_unescaped_dots,
test_unescaped_dots_in_exclusion,
test_unencrypted_to,
test_non_ascii
]
failure = 0
seen_file = False
xpath_ruleset = etree.XPath("/ruleset")
xpath_ruleset_name = etree.XPath("/ruleset/@name")
xpath_ruleset_platform = etree.XPath("/ruleset/@platform")
xpath_host = etree.XPath("/ruleset/target/@host")
xpath_from = etree.XPath("/ruleset/rule/@from")
xpath_to = etree.XPath("/ruleset/rule/@to")
host_counter = Counter()
for filename in filenames:
xml_parser = etree.XMLParser(remove_blank_text=True)
basename = filename.split(os.path.sep)[-1]
if basename == '00README' or basename == 'make-trivial-rule' or basename == 'default.rulesets':
continue
try:
tree = etree.parse(filename, xml_parser)
except Exception as oops:
print("%s failed XML validity: %s\n" % (filename, oops))
sys.exit(1)
if not xpath_ruleset(tree):
continue
rn = xpath_ruleset_name(tree)[0]
if not rn:
failure = 1
fail("unnamed ruleset")
continue
from_attrib = xpath_from(tree)
to = xpath_to(tree)
for test in tests:
if not test(tree, rn, from_attrib=from_attrib, to=to):
failure = 1
fail("%s failed test: %s" % (filename, test.__doc__))
platform_list = xpath_ruleset_platform(tree)
if len(platform_list) == 0:
platform = ''
else:
platform = platform_list[0]
targets = xpath_host(tree)
for target in targets:
host_counter.update([(target, platform)])
for (host, platform), count in host_counter.most_common():
if count > 1:
if host in duplicate_allowed_list:
warn("Whitelisted hostname %s with platform '%s' shows up in %d different rulesets." % (host, platform, count))
else:
failure = 1
fail("Hostname %s with platform '%s' shows up in %d different rulesets." % (host, platform, count))
if not is_valid_target_host(host):
failure = 1
fail("%s failed: %s" % (host, is_valid_target_host.__doc__))
sys.exit(failure)