Skip to content

Commit

Permalink
new heuristic: identification of uncommon instruction sequences. work…
Browse files Browse the repository at this point in the history
…s for x86-64 right now
  • Loading branch information
mrphrazer committed Feb 23, 2022
1 parent 81ccea8 commit 034aa03
Show file tree
Hide file tree
Showing 8 changed files with 1,110 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Obfuscation Detection (v1.3)
# Obfuscation Detection (v1.4)
Author: **Tim Blazytko**

_Automatically detect obfuscated code and other state machines_
Expand Down
3 changes: 3 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@

PluginCommand.register("Obfuscation Detection\\Instruction Overlapping",
"Heuristic to detect instruction overlapping", heuristics.find_instruction_overlapping)

PluginCommand.register("Obfuscation Detection\\Uncommon Instruction Sequences",
"Heuristic to detect uncommon instruction sequences", heuristics.find_uncommon_instruction_sequences)
42 changes: 42 additions & 0 deletions generate_ngram_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/python
import glob
import sys
from collections import Counter
from pprint import pformat

from binaryninja import BinaryViewType

from obfuscation_detection.utils import calc_global_ngrams

# check file arguments
if len(sys.argv) < 3:
print("[*] Syntax: {} <path to analysis directory> <output file>".format(sys.argv[0]))
exit(0)

# parse arguments
analysis_directory = sys.argv[1]
output_file_path = sys.argv[2]

# global ngrams counter
ngrams = Counter()
# set n as default to 3
n = 3

# walk over all binaries in the provided directory
for binary_file_path in glob.glob(f"{analysis_directory}/*"):
print(f"Analyzing file {binary_file_path}.")
# init binary ninja
bv = BinaryViewType.get_view_of_file(binary_file_path)
# wait until analysis finishes
bv.update_analysis_and_wait()
# count ngrams
ngrams.update(calc_global_ngrams(bv, n))


# prepare output string -- the most common 1k ngrams in a set
output_string = pformat({k for k, v in ngrams.most_common(1000)})

# write output file
with open(output_file_path, 'w') as output_file:
output_file.write(output_string)
output_file.close()
3 changes: 3 additions & 0 deletions obfuscation_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ def detect_obfuscation(bv):

# find overlapping instructions
find_instruction_overlapping(bv)

# find uncommon instruction sequences
find_uncommon_instruction_sequences(bv)
12 changes: 12 additions & 0 deletions obfuscation_detection/heuristics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from math import ceil

from binaryninja import highlight

from obfuscation_detection.utils import *


Expand Down Expand Up @@ -80,3 +82,13 @@ def find_instruction_overlapping(bv):
for address in sorted(functions_with_overlapping):
print(
f"Overlapping instructions in function {hex(address)} ({bv.get_function_at(address).name}).")


def find_uncommon_instruction_sequences(bv):
print("=" * 80)
print("Uncommon Instruction Sequences")

# print top 10% (iterate in descending order)
for f, score in get_top_10_functions(bv.functions, calc_uncommon_instruction_sequences_score):
print(
f"Function {hex(f.start)} ({f.name}) has an uncommon instruction sequences score of {score}.")
Loading

0 comments on commit 034aa03

Please sign in to comment.