Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

verbose: show process name and other human-level details #1825

Merged
merged 19 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions capa/features/extractors/base_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,14 @@ def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature,
"""
raise NotImplementedError()

@abc.abstractmethod
def get_process_name(self, ph: ProcessHandle) -> str:
"""
Returns the human-readable name for the given process,
such as the filename.
"""
raise NotImplementedError()

@abc.abstractmethod
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
"""
Expand Down Expand Up @@ -448,5 +456,15 @@ def extract_call_features(
"""
raise NotImplementedError()

@abc.abstractmethod
def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> str:
"""
Returns the human-readable name for the given call,
such as as rendered API log entry, like:

Foo(1, "two", b"\x00\x11") -> -1
"""
raise NotImplementedError()


FeatureExtractor: TypeAlias = Union[StaticFeatureExtractor, DynamicFeatureExtractor]
43 changes: 42 additions & 1 deletion capa/features/extractors/cape/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from capa.exceptions import EmptyReportError, UnsupportedFormatError
from capa.features.common import Feature, Characteristic
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress, _NoAddress
from capa.features.extractors.cape.models import Static, CapeReport
from capa.features.extractors.cape.models import Call, Static, Process, CapeReport
from capa.features.extractors.base_extractor import (
CallHandle,
SampleHashes,
Expand Down Expand Up @@ -60,6 +60,10 @@ def get_processes(self) -> Iterator[ProcessHandle]:
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.cape.process.extract_features(ph)

def get_process_name(self, ph) -> str:
process: Process = ph.inner
return process.process_name

def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
yield from capa.features.extractors.cape.process.get_threads(ph)

Expand All @@ -78,6 +82,43 @@ def extract_call_features(
) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)

def get_call_name(self, ph, th, ch) -> str:
call: Call = ch.inner

parts = []
parts.append(call.api)
parts.append("(")
for argument in call.arguments:
parts.append(argument.name)
parts.append("=")

if argument.pretty_value:
parts.append(argument.pretty_value)
else:
if isinstance(argument.value, int):
parts.append(hex(argument.value))
elif isinstance(argument.value, str):
parts.append('"')
parts.append(argument.value)
parts.append('"')
elif isinstance(argument.value, list):
pass
else:
capa.helpers.assert_never(argument.value)

parts.append(", ")
if call.arguments:
# remove the trailing comma
parts.pop()
parts.append(")")
parts.append(" -> ")
if call.pretty_return:
parts.append(call.pretty_return)
else:
parts.append(hex(call.return_))

return "".join(parts)

@classmethod
def from_report(cls, report: Dict) -> "CapeExtractor":
cr = CapeReport.model_validate(report)
Expand Down
8 changes: 8 additions & 0 deletions capa/features/extractors/null.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def extract_insn_features(self, f, bb, insn):

@dataclass
class CallFeatures:
name: str
features: List[Tuple[Address, Feature]]


Expand All @@ -110,6 +111,7 @@ class ThreadFeatures:
class ProcessFeatures:
features: List[Tuple[Address, Feature]]
threads: Dict[Address, ThreadFeatures]
name: str


@dataclass
Expand Down Expand Up @@ -140,6 +142,9 @@ def extract_process_features(self, ph):
for addr, feature in self.processes[ph.address].features:
yield feature, addr

def get_process_name(self, ph) -> str:
return self.processes[ph.address].name

def get_threads(self, ph):
for address in sorted(self.processes[ph.address].threads.keys()):
assert isinstance(address, ThreadAddress)
Expand All @@ -158,5 +163,8 @@ def extract_call_features(self, ph, th, ch):
for address, feature in self.processes[ph.address].threads[th.address].calls[ch.address].features:
yield feature, address

def get_call_name(self, ph, th, ch) -> str:
return self.processes[ph.address].threads[th.address].calls[ch.address].name


NullFeatureExtractor: TypeAlias = Union[NullStaticFeatureExtractor, NullDynamicFeatureExtractor]
10 changes: 9 additions & 1 deletion capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ class FunctionFeatures(BaseModel):

class CallFeatures(BaseModel):
address: Address
name: str
features: Tuple[CallFeature, ...]


Expand All @@ -300,6 +301,7 @@ class ThreadFeatures(BaseModel):

class ProcessFeatures(BaseModel):
address: Address
name: str
features: Tuple[ProcessFeature, ...]
threads: Tuple[ThreadFeatures, ...]

Expand Down Expand Up @@ -463,6 +465,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
process_features: List[ProcessFeatures] = []
for p in extractor.get_processes():
paddr = Address.from_capa(p.address)
pname = extractor.get_process_name(p)
pfeatures = [
ProcessFeature(
process=paddr,
Expand All @@ -488,6 +491,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
calls = []
for call in extractor.get_calls(p, t):
caddr = Address.from_capa(call.address)
cname = extractor.get_call_name(p, t, call)
cfeatures = [
CallFeature(
call=caddr,
Expand All @@ -500,6 +504,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
calls.append(
CallFeatures(
address=caddr,
name=cname,
features=tuple(cfeatures),
)
)
Expand All @@ -515,6 +520,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
process_features.append(
ProcessFeatures(
address=paddr,
name=pname,
features=tuple(pfeatures),
threads=tuple(threads),
)
Expand Down Expand Up @@ -595,13 +601,15 @@ def loads_dynamic(s: str) -> DynamicFeatureExtractor:
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
processes={
p.address.to_capa(): null.ProcessFeatures(
name=p.name,
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in p.features],
threads={
t.address.to_capa(): null.ThreadFeatures(
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in t.features],
calls={
c.address.to_capa(): null.CallFeatures(
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in c.features]
name=c.name,
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in c.features],
)
for c in t.calls
},
Expand Down
57 changes: 46 additions & 11 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import itertools
import contextlib
import collections
from typing import Any, Dict, List, Tuple, Callable, Optional
from typing import Any, Set, Dict, List, Tuple, Callable, Optional
from pathlib import Path

import halo
Expand Down Expand Up @@ -1050,7 +1050,7 @@ def collect_metadata(
)


def compute_dynamic_layout(rules, extractor: DynamicFeatureExtractor, capabilities) -> rdoc.DynamicLayout:
def compute_dynamic_layout(rules, extractor: DynamicFeatureExtractor, capabilities: MatchResults) -> rdoc.DynamicLayout:
"""
compute a metadata structure that links threads
to the processes in which they're found.
Expand All @@ -1060,28 +1060,63 @@ def compute_dynamic_layout(rules, extractor: DynamicFeatureExtractor, capabiliti
a large amount of un-referenced data.
"""
assert isinstance(extractor, DynamicFeatureExtractor)

matched_threads: Set[Address] = set()
for rule_name, matches in capabilities.items():
rule = rules[rule_name]
if capa.rules.Scope.THREAD in rule.scopes:
for addr, _ in matches:
matched_threads.add(addr)

matched_calls: Set[Address] = set()

def result_rec(result: capa.features.common.Result):
for loc in result.locations:
if isinstance(loc, capa.features.address.DynamicCallAddress):
matched_calls.add(loc)
for child in result.children:
result_rec(child)

for matches in capabilities.values():
for _, result in matches:
result_rec(result)

processes_by_thread: Dict[Address, Address] = {}
threads_by_processes: Dict[Address, List[Address]] = {}
names_by_process: Dict[Address, str] = {}
calls_by_thread: Dict[Address, List[Address]] = {}
names_by_call: Dict[Address, str] = {}
for p in extractor.get_processes():
threads_by_processes[p.address] = []
names_by_process[p.address] = extractor.get_process_name(p)
for t in extractor.get_threads(p):
processes_by_thread[t.address] = p.address
threads_by_processes[p.address].append(t.address)

matched_threads = set()
for rule_name, matches in capabilities.items():
rule = rules[rule_name]
if capa.rules.Scope.THREAD in rule.scopes:
for addr, _ in matches:
assert addr in processes_by_thread
matched_threads.add(addr)
calls_by_thread[t.address] = []
for c in extractor.get_calls(p, t):
calls_by_thread[t.address].append(c.address)
if c.address in matched_calls:
names_by_call[c.address] = extractor.get_call_name(p, t, c)

layout = rdoc.DynamicLayout(
processes=tuple(
rdoc.ProcessLayout(
address=frz.Address.from_capa(p),
name=names_by_process[p],
matched_threads=tuple(
rdoc.ThreadLayout(address=frz.Address.from_capa(t)) for t in threads if t in matched_threads
rdoc.ThreadLayout(
address=frz.Address.from_capa(t),
matched_calls=tuple(
rdoc.CallLayout(
address=frz.Address.from_capa(c),
name=names_by_call[c],
)
for c in calls_by_thread[t]
if c in matched_calls
),
)
for t in threads
if t in matched_threads
) # this object is open to extension in the future,
# such as with the function name, etc.
)
Expand Down
29 changes: 27 additions & 2 deletions capa/render/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,20 @@ def dynamic_analysis_to_pb2(analysis: rd.DynamicAnalysis) -> capa_pb2.DynamicAna
processes=[
capa_pb2.ProcessLayout(
address=addr_to_pb2(p.address),
matched_threads=[capa_pb2.ThreadLayout(address=addr_to_pb2(t.address)) for t in p.matched_threads],
name=p.name,
matched_threads=[
capa_pb2.ThreadLayout(
address=addr_to_pb2(t.address),
matched_calls=[
capa_pb2.CallLayout(
address=addr_to_pb2(c.address),
name=c.name,
)
for c in t.matched_calls
],
)
for t in p.matched_threads
],
)
for p in analysis.layout.processes
]
Expand Down Expand Up @@ -705,8 +718,20 @@ def dynamic_analysis_from_pb2(analysis: capa_pb2.DynamicAnalysis) -> rd.DynamicA
[
rd.ProcessLayout(
address=addr_from_pb2(p.address),
name=p.name,
matched_threads=tuple(
[rd.ThreadLayout(address=addr_from_pb2(t.address)) for t in p.matched_threads]
[
rd.ThreadLayout(
address=addr_from_pb2(t.address),
matched_calls=tuple(
[
rd.CallLayout(address=addr_from_pb2(c.address), name=c.name)
for c in t.matched_calls
]
),
)
for t in p.matched_threads
]
),
)
for p in analysis.layout.processes
Expand Down
7 changes: 7 additions & 0 deletions capa/render/proto/capa.proto
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ message ProcessFeatureCount {
message ProcessLayout {
Address address = 1;
repeated ThreadLayout matched_threads = 2;
string name = 3;
}

message PropertyFeature {
Expand Down Expand Up @@ -429,8 +430,14 @@ message SubstringFeature {
optional string description = 3;
}

message CallLayout {
Address address = 1;
string name = 2;
}

message ThreadLayout {
Address address = 1;
repeated CallLayout matched_calls = 2;
}

message Addresses { repeated Address address = 1; }
Expand Down
Loading
Loading