Skip to content

Commit

Permalink
binja: provide llil to instruction handlers via ctx
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed Nov 27, 2024
1 parent 319dbfe commit 73f56f5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 27 deletions.
14 changes: 9 additions & 5 deletions capa/features/extractors/binja/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import capa.features.extractors.binja.file
import capa.features.extractors.binja.insn
import capa.features.extractors.binja.global_
import capa.features.extractors.binja.helpers
import capa.features.extractors.binja.function
import capa.features.extractors.binja.basicblock
from capa.features.common import Feature
Expand Down Expand Up @@ -133,13 +134,16 @@ def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Ite
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)

def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.binja.helpers as binja_helpers
f: binja.Function = fh.inner

bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
addr = bb[0].start
bb: binja.BasicBlock
mlbb: binja.MediumLevelILBasicBlock
bb, mlbb = bbh.inner

for text, length in bb[0]:
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
addr: int = bb.start
for text, length in bb:
llil = f.get_llils_at(addr)
insn = capa.features.extractors.binja.helpers.DisassemblyInstruction(addr, length, text, llil)
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
addr += length

Expand Down
20 changes: 19 additions & 1 deletion capa/features/extractors/binja/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Callable
from dataclasses import dataclass

from binaryninja import BinaryView, LowLevelILInstruction
from binaryninja import BinaryView, LowLevelILOperation, LowLevelILInstruction
from binaryninja.architecture import InstructionTextToken


Expand All @@ -18,6 +18,24 @@ class DisassemblyInstruction:
address: int
length: int
text: list[InstructionTextToken]
llil: list[LowLevelILInstruction]

@property
def is_call(self):
if not self.llil:
return False

# TODO(williballenthin): when to use one vs many llil instructions
# https://github.com/Vector35/binaryninja-api/issues/6205
llil = self.llil[0]
if not llil:
return False

return llil.operation in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_TAILCALL,
]


LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
Expand Down
48 changes: 27 additions & 21 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
SECURITY_COOKIE_BYTES_DELTA = 0x40


# TODO: move this to call graph pass
# check if a function is a stub function to another function/symbol. The criteria is:
# 1. The function must only have one basic block
# 2. The function must only make one call/jump to another address
Expand Down Expand Up @@ -82,8 +83,9 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
"""
func: Function = fh.inner
bv: BinaryView = func.view
insn: DisassemblyInstruction = ih.inner

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
if llil.operation in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
Expand Down Expand Up @@ -138,10 +140,11 @@ def extract_insn_number_features(
example:
push 3136B0h ; dwControlCode
"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

results: list[tuple[Any[Number, OperandNumber], Address]] = []

# TODO: try to move this out of line
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
if il.operation == LowLevelILOperation.LLIL_LOAD:
return False
Expand All @@ -165,7 +168,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index

return False

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

yield from results
Expand All @@ -179,11 +182,11 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
"""
func: Function = fh.inner
bv: BinaryView = func.view
insn: DisassemblyInstruction = ih.inner

candidate_addrs = set()

llil = func.get_llil_at(ih.address)
if llil is None or llil.operation in [LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST]:
if insn.is_call:
return

for ref in bv.get_code_refs_from(ih.address):
Expand All @@ -205,7 +208,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index

return True

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

for addr in candidate_addrs:
Expand All @@ -227,6 +230,7 @@ def extract_insn_string_features(
"""
func: Function = fh.inner
bv: BinaryView = func.view
insn: DisassemblyInstruction = ih.inner

candidate_addrs = set()

Expand All @@ -250,7 +254,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index

return True

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

# Now we have all the candidate address, check them for string or pointer to string
Expand Down Expand Up @@ -283,6 +287,7 @@ def extract_insn_offset_features(
.text:0040112F cmp [esi+4], ebx
"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

results: list[tuple[Any[Offset, OperandOffset], Address]] = []
address_size = func.view.arch.address_size * 8
Expand Down Expand Up @@ -327,7 +332,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index

return True

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

yield from results
Expand Down Expand Up @@ -367,7 +372,7 @@ def extract_insn_nzxor_characteristic_features(
parse instruction non-zeroing XOR instruction
ignore expected non-zeroing XORs, e.g. security cookies
"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

results = []

Expand All @@ -383,7 +388,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
else:
return True

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

yield from results
Expand Down Expand Up @@ -415,7 +420,7 @@ def extract_insn_peb_access_characteristic_features(
fs:[0x30] on x86, gs:[0x60] on x64
"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

results = []

Expand Down Expand Up @@ -445,7 +450,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILOperation, index:
results.append((Characteristic("peb access"), ih.address))
return False

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

yield from results
Expand All @@ -455,7 +460,7 @@ def extract_insn_segment_access_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
) -> Iterator[tuple[Feature, Address]]:
"""parse instruction fs or gs access"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

results = []

Expand All @@ -472,7 +477,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index

return True

for llil in func.get_llils_at(ih.address):
for llil in insn.llil:
visit_llil_exprs(llil, llil_checker)

yield from results
Expand Down Expand Up @@ -509,14 +514,15 @@ def extract_function_indirect_call_characteristic_features(
most relevant at the function or basic block scope;
however, its most efficient to extract at the instruction scope
"""
func: Function = fh.inner
insn: DisassemblyInstruction = ih.inner

if not insn.is_call:
return

llil = func.get_llil_at(ih.address)
if llil is None or llil.operation not in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_TAILCALL,
]:
# TODO(williballenthin): when to use one vs many llil instructions
# https://github.com/Vector35/binaryninja-api/issues/6205
llil = insn.llil[0]
if not llil:
return

if llil.dest.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
Expand Down

0 comments on commit 73f56f5

Please sign in to comment.