From 379d6ef313837ea6da6ed84545fccfc075ba7348 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:42:28 +0000 Subject: [PATCH 01/10] build(deps): bump protobuf from 5.28.2 to 5.29.0 Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 5.28.2 to 5.29.0. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/main/protobuf_release.bzl) - [Commits](https://github.com/protocolbuffers/protobuf/compare/v5.28.2...v5.29.0) --- updated-dependencies: - dependency-name: protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index eaee96623a..67c7920663 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ msgpack==1.0.8 networkx==3.4.2 pefile==2024.8.26 pip==24.3.1 -protobuf==5.28.2 +protobuf==5.29.0 pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.22 From 9ad3f06e1d13cb1615be88f6112d96dfbb333543 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 3 Dec 2024 11:09:38 +0000 Subject: [PATCH 02/10] skip test where BN misses the function --- tests/test_binja_features.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index f57c3b50f6..51f4df68eb 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -40,6 +40,10 @@ indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): + # TODO(mr-tz): BinaryNinja does not recognize this function + # https://github.com/mandiant/capa/issues/2507 + if scope.__name__ == "function=0x14004B4F0": + pytest.xfail("BinaryNinja does not recognize this function") fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected) From 65e2dac4c4c667da58870523d0152919e8df97fd Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 3 Dec 2024 12:09:38 +0000 Subject: [PATCH 03/10] upgrade pre-commit config --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b77d35242..262b600e87 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: hooks: - id: isort name: isort - stages: [commit, push, manual] + stages: [pre-commit, pre-push, manual] language: system entry: isort args: @@ -46,7 +46,7 @@ repos: hooks: - id: black name: black - stages: [commit, push, manual] + stages: [pre-commit, pre-push, manual] language: system entry: black args: @@ -64,7 +64,7 @@ repos: hooks: - id: ruff name: ruff - stages: [commit, push, manual] + stages: [pre-commit, pre-push, manual] language: system entry: ruff args: @@ -82,7 +82,7 @@ repos: hooks: - id: flake8 name: flake8 - stages: [push, manual] + stages: [pre-push, manual] language: system entry: flake8 args: @@ -101,7 +101,7 @@ repos: hooks: - id: mypy name: mypy - stages: [push, manual] + stages: [pre-push, manual] language: system entry: mypy args: @@ -119,7 +119,7 @@ repos: hooks: - id: deptry name: deptry - stages: [push, manual] + stages: [pre-push, manual] language: system entry: deptry . always_run: true From 959a234f0e44a5884e455acb7d1d1f9bf296193a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 3 Dec 2024 13:00:37 +0000 Subject: [PATCH 04/10] make Process model flexible and procmemory optional --- CHANGELOG.md | 1 + capa/features/extractors/cape/models.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4aa64f14c..364a8276ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ - binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6 - binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6 - binja: major performance improvement on the binja extractor. #1414 @xusheng6 +- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz ### capa Explorer Web diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 20bedec24e..c37eddd2a4 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -297,7 +297,10 @@ class Call(ExactModel): id: int -class Process(ExactModel): +# FlexibleModel to account for extended fields +# refs: https://github.com/mandiant/capa/issues/2466 +# https://github.com/kevoreilly/CAPEv2/pull/2199 +class Process(FlexibleModel): process_id: int process_name: str parent_id: int @@ -400,7 +403,7 @@ class CapeReport(FlexibleModel): CAPE: Optional[Union[Cape, list]] = None dropped: Optional[list[File]] = None procdump: Optional[list[ProcessFile]] = None - procmemory: ListTODO + procmemory: Optional[ListTODO] = None # ========================================================================= # information we won't use in capa From ed86e5fb1b3d36c747482b395d93a97726298391 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Dec 2024 13:12:36 +0000 Subject: [PATCH 05/10] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 16492182a3..1adcf13e06 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 16492182a3ce3f95bba56531cb81030e015edaba +Subproject commit 1adcf13e061b2e15aaa4b3f9aa910d46a362aef2 From 83a46265df3e1bb0bf37ebe354afeaed0b6b77fb Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Dec 2024 16:26:33 +0000 Subject: [PATCH 06/10] Sync capa rules submodule --- CHANGELOG.md | 10 +++++++++- rules | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 364a8276ae..a36269f3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz -### New Rules (10) +### New Rules (18) - nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com - nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com @@ -23,6 +23,14 @@ - nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl - linking/static/touchsocket/linked-against-touchsocket still@teamt5.org - runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org +- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl +- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl +- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl +- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl +- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl +- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl +- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl +- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl - ### Bug Fixes diff --git a/rules b/rules index 1adcf13e06..ed816a8e53 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 1adcf13e061b2e15aaa4b3f9aa910d46a362aef2 +Subproject commit ed816a8e53446cd21a9d634b7d0531df664ba1c1 From d1f3e43325dcc6562612cb36c5386f580c098437 Mon Sep 17 00:00:00 2001 From: Moritz Date: Tue, 3 Dec 2024 19:56:30 +0100 Subject: [PATCH 07/10] vmray: record command line info (#2515) * vmray: record command line info --- CHANGELOG.md | 1 + capa/features/extractors/vmray/__init__.py | 11 ++++++++++- capa/features/extractors/vmray/extractor.py | 2 +- capa/features/extractors/vmray/models.py | 16 +++++++++++++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a36269f3eb..5df9d3b7f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - allow call as valid subscope for call scoped rules @mr-tz - support loading and analyzing a Binary Ninja database #2496 @xusheng6 +- vmray: record process command line details @mr-tz ### Breaking Changes diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index a8976cd8c8..dc719211ae 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -35,6 +35,8 @@ class VMRayMonitorProcess: ppid: int # parent process ID assigned by OS monitor_id: int # unique ID assigned to process by VMRay image_name: str + filename: str + cmd_line: str class VMRayAnalysis: @@ -160,7 +162,12 @@ def _compute_monitor_processes(self): self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0 ) self.monitor_processes[process.monitor_id] = VMRayMonitorProcess( - process.os_pid, ppid, process.monitor_id, process.image_name + process.os_pid, + ppid, + process.monitor_id, + process.image_name, + process.filename, + process.cmd_line, ) # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394 @@ -170,6 +177,8 @@ def _compute_monitor_processes(self): monitor_process.os_parent_pid, monitor_process.process_id, monitor_process.image_name, + monitor_process.filename, + monitor_process.cmd_line, ) if monitor_process.process_id not in self.monitor_processes: diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index a9f0491c98..7f40f25dab 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -86,7 +86,7 @@ def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, def get_process_name(self, ph) -> str: monitor_process: VMRayMonitorProcess = ph.inner - return monitor_process.image_name + return f"{monitor_process.image_name} ({monitor_process.cmd_line})" def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]: diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index c2d6551aa0..755f494fe5 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -136,11 +136,20 @@ class FunctionReturn(BaseModel): from_addr: HexInt = Field(alias="from") +def sanitize_string(value: str) -> str: + # e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ", + return value.replace("\\\\", "\\").strip(' "') + + +# unify representation +SanitizedString = Annotated[str, BeforeValidator(sanitize_string)] + + class MonitorProcess(BaseModel): ts: HexInt process_id: int image_name: str - filename: str + filename: SanitizedString # page_root: HexInt os_pid: HexInt # os_integrity_level: HexInt @@ -148,7 +157,7 @@ class MonitorProcess(BaseModel): monitor_reason: str parent_id: int os_parent_pid: HexInt - # cmd_line: str + cmd_line: SanitizedString # cur_dir: str # os_username: str # bitness: int @@ -306,8 +315,9 @@ class Process(BaseModel): monitor_id: int # monitor_reason: str os_pid: int - filename: str + filename: SanitizedString image_name: str + cmd_line: SanitizedString ref_parent_process: Optional[GenericReference] = None From d7cf8d1251069dfd44d90b50ca753d1f7e2132ae Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 4 Dec 2024 14:27:54 +0800 Subject: [PATCH 08/10] Revert "skip test where BN misses the function" This reverts commit 9ad3f06e1d13cb1615be88f6112d96dfbb333543. --- tests/test_binja_features.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index 51f4df68eb..f57c3b50f6 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -40,10 +40,6 @@ indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): - # TODO(mr-tz): BinaryNinja does not recognize this function - # https://github.com/mandiant/capa/issues/2507 - if scope.__name__ == "function=0x14004B4F0": - pytest.xfail("BinaryNinja does not recognize this function") fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected) From 4448d612f16cabc38240081723f4ced5a0e18e3d Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 4 Dec 2024 14:57:13 +0800 Subject: [PATCH 09/10] binja: fix up the analysis for the al-khaser_x64.exe_ file. Fix https://github.com/mandiant/capa/issues/2507 --- CHANGELOG.md | 1 + capa/features/extractors/binja/insn.py | 11 ++++++----- tests/fixtures.py | 6 ++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5df9d3b7f7..ce457bf302 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ - binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6 - binja: major performance improvement on the binja extractor. #1414 @xusheng6 - cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz +- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6 ### capa Explorer Web diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 618ee7a135..7ebbb6d7fb 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -45,14 +45,15 @@ def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]: ]: return None - if llil.dest.value.type not in [ - RegisterValueType.ImportedAddressValue, - RegisterValueType.ConstantValue, - RegisterValueType.ConstantPointerValue, + # The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check + # `llil.dest.value.type` here + if llil.dest.operation not in [ + LowLevelILOperation.LLIL_CONST, + LowLevelILOperation.LLIL_CONST_PTR, ]: return None - return llil.dest.value.value + return llil.dest.constant def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: diff --git a/tests/fixtures.py b/tests/fixtures.py index 110b7228e2..ad70a1663f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -180,6 +180,12 @@ def get_binja_extractor(path: Path): if path.name.endswith("kernel32-64.dll_"): settings.set_bool("pdb.loadGlobalSymbols", old_pdb) + # TODO(xusheng6): Temporary fix for https://github.com/mandiant/capa/issues/2507. Remove this once it is fixed in + # binja + if "al-khaser_x64.exe_" in path.name: + bv.create_user_function(0x14004B4F0) + bv.update_analysis_and_wait() + extractor = capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv) # overload the extractor so that the fixture exposes `extractor.path` From 02c359f79f827ce4803bf661a38760636af895fe Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 4 Dec 2024 16:56:45 +0800 Subject: [PATCH 10/10] binja: move the stack string detection to the function level. Fix https://github.com/mandiant/capa/issues/2516 --- CHANGELOG.md | 1 + capa/features/extractors/binja/basicblock.py | 101 +---------------- capa/features/extractors/binja/extractor.py | 24 +--- capa/features/extractors/binja/function.py | 110 ++++++++++++++++++- capa/features/extractors/binja/insn.py | 2 +- 5 files changed, 118 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce457bf302..5c82e25072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - binja: major performance improvement on the binja extractor. #1414 @xusheng6 - cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz - binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6 +- binja: move the stack string detection to function level #2516 @xusheng6 ### capa Explorer Web diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index 5cb8ca138e..2e47770b55 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -5,111 +5,21 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. - -import string from typing import Iterator -from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock -from binaryninja import ( - BinaryView, - SymbolType, - RegisterValueType, - VariableSourceType, - MediumLevelILOperation, - MediumLevelILBasicBlock, - MediumLevelILInstruction, -) from capa.features.common import Feature, Characteristic from capa.features.address import Address from capa.features.basicblock import BasicBlock -from capa.features.extractors.helpers import MIN_STACKSTRING_LEN from capa.features.extractors.base_extractor import BBHandle, FunctionHandle -def get_printable_len_ascii(s: bytes) -> int: - """Return string length if all operand bytes are ascii or utf16-le printable""" - count = 0 - for c in s: - if c == 0: - return count - if c < 127 and chr(c) in string.printable: - count += 1 - return count - - -def get_printable_len_wide(s: bytes) -> int: - """Return string length if all operand bytes are ascii or utf16-le printable""" - if all(c == 0x00 for c in s[1::2]): - return get_printable_len_ascii(s[::2]) - return 0 - - -def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: - bv: BinaryView = f.view - - if il.operation != MediumLevelILOperation.MLIL_CALL: - return 0 - - target = il.dest - if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]: - return 0 - - addr = target.value.value - sym = bv.get_symbol_at(addr) - if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]: - return 0 - - if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]: - return 0 - - if len(il.params) < 2: - return 0 - - dest = il.params[0] - if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]: - var = dest.src - else: - return 0 - - if var.source_type != VariableSourceType.StackVariableSourceType: - return 0 - - src = il.params[1] - if src.value.type != RegisterValueType.ConstantDataAggregateValue: - return 0 - - s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value) - return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s))) - - -def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: - """check basic block for stackstring indicators - - true if basic block contains enough moves of constant bytes to the stack - """ - count = 0 - for il in bb: - count += get_stack_string_len(f, il) - if count > MIN_STACKSTRING_LEN: - return True - - return False - - -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: - """extract stackstring indicators from basic block""" - bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner - if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]): - yield Characteristic("stack string"), bbh.address - - def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract tight loop indicators from a basic block""" - bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner - for edge in bb[0].outgoing_edges: - if edge.target.start == bb[0].start: + bb: BinjaBasicBlock = bbh.inner + for edge in bb.outgoing_edges: + if edge.target.start == bb.start: yield Characteristic("tight loop"), bbh.address @@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur yield BasicBlock(), bbh.address -BASIC_BLOCK_HANDLERS = ( - extract_bb_tight_loop, - extract_bb_stackstring, -) +BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,) diff --git a/capa/features/extractors/binja/extractor.py b/capa/features/extractors/binja/extractor.py index 1d4dd6bd7c..953cde76e1 100644 --- a/capa/features/extractors/binja/extractor.py +++ b/capa/features/extractors/binja/extractor.py @@ -8,7 +8,6 @@ from typing import Iterator import binaryninja as binja -from binaryninja import ILException import capa.features.extractors.elf import capa.features.extractors.binja.file @@ -54,23 +53,8 @@ def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Featur def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: f: binja.Function = fh.inner - # Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block - mlil_lookup = {} - try: - mlil = f.mlil - except ILException: - return - - if mlil is None: - return - - for mlil_bb in mlil.basic_blocks: - mlil_lookup[mlil_bb.source_block.start] = mlil_bb - for bb in f.basic_blocks: - mlil_bb = mlil_lookup.get(bb.start) - - yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb)) + yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb) def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh) @@ -78,10 +62,10 @@ def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Ite def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: import capa.features.extractors.binja.helpers as binja_helpers - bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner - addr = bb[0].start + bb: binja.BasicBlock = bbh.inner + addr = bb.start - for text, length in bb[0]: + for text, length in bb: insn = binja_helpers.DisassemblyInstruction(addr, length, text) yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn) addr += length diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index 18973539bf..c7c017d1bd 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -5,14 +5,27 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import string from typing import Iterator -from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation +from binaryninja import ( + Function, + BinaryView, + SymbolType, + ILException, + RegisterValueType, + VariableSourceType, + LowLevelILOperation, + MediumLevelILOperation, + MediumLevelILBasicBlock, + MediumLevelILInstruction, +) from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops +from capa.features.extractors.helpers import MIN_STACKSTRING_LEN from capa.features.extractors.binja.helpers import get_llil_instr_at_addr from capa.features.extractors.base_extractor import FunctionHandle @@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle): yield FunctionName(name[1:]), sym.address +def get_printable_len_ascii(s: bytes) -> int: + """Return string length if all operand bytes are ascii or utf16-le printable""" + count = 0 + for c in s: + if c == 0: + return count + if c < 127 and chr(c) in string.printable: + count += 1 + return count + + +def get_printable_len_wide(s: bytes) -> int: + """Return string length if all operand bytes are ascii or utf16-le printable""" + if all(c == 0x00 for c in s[1::2]): + return get_printable_len_ascii(s[::2]) + return 0 + + +def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: + bv: BinaryView = f.view + + if il.operation != MediumLevelILOperation.MLIL_CALL: + return 0 + + target = il.dest + if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]: + return 0 + + addr = target.value.value + sym = bv.get_symbol_at(addr) + if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]: + return 0 + + if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]: + return 0 + + if len(il.params) < 2: + return 0 + + dest = il.params[0] + if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]: + var = dest.src + else: + return 0 + + if var.source_type != VariableSourceType.StackVariableSourceType: + return 0 + + src = il.params[1] + if src.value.type != RegisterValueType.ConstantDataAggregateValue: + return 0 + + s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value) + return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s))) + + +def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: + """check basic block for stackstring indicators + + true if basic block contains enough moves of constant bytes to the stack + """ + count = 0 + for il in bb: + count += get_stack_string_len(f, il) + if count > MIN_STACKSTRING_LEN: + return True + + return False + + +def extract_stackstring(fh: FunctionHandle): + """extract stackstring indicators""" + func: Function = fh.inner + bv: BinaryView = func.view + if bv is None: + return + + try: + mlil = func.mlil + except ILException: + return + + for block in mlil.basic_blocks: + if bb_contains_stackstring(func, block): + yield Characteristic("stack string"), block.source_block.start + + def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name) +FUNCTION_HANDLERS = ( + extract_function_calls_to, + extract_function_loop, + extract_recursive_call, + extract_function_name, + extract_stackstring, +) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 7ebbb6d7fb..90be9a55f2 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -359,7 +359,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index # e.g., , (LLIL_SET_REG). So we do not need to check whether the two operands are the same. if il.operation == LowLevelILOperation.LLIL_XOR: # Exclude cases related to the stack cookie - if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il): + if is_nzxor_stack_cookie(fh.inner, bbh.inner, il): return False results.append((Characteristic("nzxor"), ih.address)) return False