diff --git a/CHANGELOG.md b/CHANGELOG.md index 958aa61a3d..b3a71c9f9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ - ghidra: fix saving of base address @mr-tz - binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6 - binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6 +- binja: major performance improvement on the binja extractor. #1414 @xusheng6 ### capa Explorer Web diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index dfddfda2bf..18973539bf 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,12 +7,13 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Iterator -from binaryninja import Function, BinaryView, SymbolType, ILException, RegisterValueType, LowLevelILOperation +from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops +from capa.features.extractors.binja.helpers import get_llil_instr_at_addr from capa.features.extractors.base_extractor import FunctionHandle @@ -24,14 +25,7 @@ def extract_function_calls_to(fh: FunctionHandle): # Everything that is a code reference to the current function is considered a caller, which actually includes # many other references that are NOT a caller. For example, an instruction `push function_start` will also be # considered a caller to the function - llil = None - try: - # Temporary fix for https://github.com/Vector35/binaryninja-api/issues/6020. Since `.llil` can throw an - # exception rather than returning None - llil = caller.llil - except ILException: - continue - + llil = get_llil_instr_at_addr(func.view, caller.address) if (llil is None) or llil.operation not in [ LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST, @@ -40,14 +34,13 @@ def extract_function_calls_to(fh: FunctionHandle): ]: continue - if llil.dest.value.type not in [ - RegisterValueType.ImportedAddressValue, - RegisterValueType.ConstantValue, - RegisterValueType.ConstantPointerValue, + if llil.dest.operation not in [ + LowLevelILOperation.LLIL_CONST, + LowLevelILOperation.LLIL_CONST_PTR, ]: continue - address = llil.dest.value.value + address = llil.dest.constant if address != func.start: continue diff --git a/capa/features/extractors/binja/helpers.py b/capa/features/extractors/binja/helpers.py index 27e8d29e03..dd61bd9181 100644 --- a/capa/features/extractors/binja/helpers.py +++ b/capa/features/extractors/binja/helpers.py @@ -6,10 +6,10 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Callable +from typing import Callable, Optional from dataclasses import dataclass -from binaryninja import BinaryView, LowLevelILInstruction +from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction from binaryninja.architecture import InstructionTextToken @@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str: s.append(chr(c)) return "".join(s) + + +def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]: + arch = bv.arch + buffer = bv.read(addr, arch.max_instr_length) + llil = LowLevelILFunction(arch=arch) + llil.current_address = addr + if arch.get_instruction_low_level_il(buffer, addr, llil) == 0: + return None + return llil[0] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index cae131ef95..618ee7a135 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -13,7 +13,6 @@ BinaryView, ILRegister, SymbolType, - ILException, BinaryReader, RegisterValueType, LowLevelILOperation, @@ -24,7 +23,7 @@ from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress -from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs +from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -37,40 +36,23 @@ # 2. The function must only make one call/jump to another address # If the function being checked is a stub function, returns the target address. Otherwise, return None. def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]: - funcs = bv.get_functions_at(addr) - for func in funcs: - if len(func.basic_blocks) != 1: - continue - - call_count = 0 - call_target = None - try: - llil = func.llil - except ILException: - return None + llil = get_llil_instr_at_addr(bv, addr) + if llil is None or llil.operation not in [ + LowLevelILOperation.LLIL_CALL, + LowLevelILOperation.LLIL_CALL_STACK_ADJUST, + LowLevelILOperation.LLIL_JUMP, + LowLevelILOperation.LLIL_TAILCALL, + ]: + return None - if llil is None: - continue + if llil.dest.value.type not in [ + RegisterValueType.ImportedAddressValue, + RegisterValueType.ConstantValue, + RegisterValueType.ConstantPointerValue, + ]: + return None - for il in llil.instructions: - if il.operation in [ - LowLevelILOperation.LLIL_CALL, - LowLevelILOperation.LLIL_CALL_STACK_ADJUST, - LowLevelILOperation.LLIL_JUMP, - LowLevelILOperation.LLIL_TAILCALL, - ]: - call_count += 1 - if il.dest.value.type in [ - RegisterValueType.ImportedAddressValue, - RegisterValueType.ConstantValue, - RegisterValueType.ConstantPointerValue, - ]: - call_target = il.dest.value.value - - if call_count == 1 and call_target is not None: - return call_target - - return None + return llil.dest.value.value def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: