forked from bmaia/binwally
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbinwally.py
145 lines (130 loc) · 4.85 KB
/
binwally.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/python
import ssdeep
import os, sys
blocksize = 1024 * 1024
def reportdiffs(unique1, unique2, dir1, dir2, diffs):
"""
Generate diffs report for unique files and dirs
Unique files have a matching score of 0
"""
if unique1:
for file in unique1:
root = os.path.join(dir1,file)
if os.path.isfile(root):
print ' <<< unique ',root
diffs.append(0)
else:
for root, dir, files in os.walk(root):
for file in files:
print ' <<< unique ',os.path.join(root,file)
diffs.append(0)
if unique2:
for file in unique2:
root = os.path.join(dir2,file)
if os.path.isfile(root):
print ' >>> unique ',root
diffs.append(0)
else:
for root, dir, files in os.walk(root):
for file in files:
print ' >>> unique ',os.path.join(root,file)
diffs.append(0)
def difference(seq1, seq2):
"""
Return all items in seq1 only
"""
return [item for item in seq1 if item not in seq2]
def intersect(seq1, seq2):
"""
Return all items in both seq1 and seq2
"""
return [item for item in seq1 if item in seq2]
def comparedirs(dir1, dir2, diffs, files1=None, files2=None):
"""
Compare directory contents, but not actual files
"""
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1, files2)
unique2 = difference(files2, files1)
reportdiffs(unique1, unique2, dir1, dir2, diffs)
return not (unique1 or unique2) # true if no diffs
def comparetrees(dir1, dir2, diffs):
"""
Compare all subdirectories and files in two directory trees
Same files have a matching score of 100
Symlinks have a matching score of 100
Different files have a matching score calculated using ssdeep (0 to 100)
"""
names1 = os.listdir(dir1)
names2 = os.listdir(dir2)
comparedirs(dir1, dir2, diffs, names1, names2)
common = intersect(names1, names2)
missed = common[:]
# compare contents of files in common
for name in common:
path1 = os.path.join(dir1, name)
path2 = os.path.join(dir2, name)
if os.path.isfile(path1) and os.path.isfile(path2):
missed.remove(name)
file1 = open(path1, 'rb')
file2 = open(path2, 'rb')
while True:
bytes1 = file1.read(blocksize)
bytes2 = file2.read(blocksize)
if (not bytes1) and (not bytes2): # same file
print ' 100 matches','/'.join(path1.split('/')[1:])
diffs.append(100)
break
if bytes1 != bytes2: # different content
score = ssdeep.compare(ssdeep.hash_from_file(path1),ssdeep.hash_from_file(path2))
print str(score).rjust(5),'differs','/'.join(path1.split('/')[1:])
diffs.append(score)
break
# recur to compare directories in common
for name in common:
path1 = os.path.join(dir1, name)
path2 = os.path.join(dir2, name)
if os.path.isdir(path1) and os.path.isdir(path2):
missed.remove(name)
comparetrees(path1, path2, diffs)
# same name but not both files or dirs (symlinks)
for name in missed:
diffs.append(100)
print(' - ignored '+name+' (symlink)')
def getargs():
"Command-line arguments"
try:
dir1, dir2 = sys.argv[1:]
except:
print '\
\nBinwally: Binary and Directory tree comparison tool\
\n using the Fuzzy Hashing concept (ssdeep)\n\
\nBernardo Rodrigues, http://w00tsec.blogspot.com\n\
\nUsage: python %s dir1 dir2' % os.path.basename(sys.argv[0])+'\n'
sys.exit(1)
else:
return (dir1, dir2)
if __name__ == '__main__':
dir1, dir2 = getargs()
diffs = []
totalscore = 0
# command line arguments are both dirs
if os.path.isdir(dir1) & os.path.isdir(dir2):
print '\nSCORE RESULT PATH'
comparetrees(dir1, dir2, diffs)
if not diffs:
print('No diffs found\n')
else:
for score in diffs:
totalscore += score
print '\nTotal files compared:',len(diffs)
print 'Overall match score: ',str(totalscore/len(diffs))+'%\n'
else:
try:
# command line arguments are both files
score = ssdeep.compare(ssdeep.hash_from_file(dir1),ssdeep.hash_from_file(dir2))
print 'Overall match score: ',str(score)+'%\n'
except:
print 'Invalid Files/Folders: Aborting...'
sys.exit(1)