Skip to content

Commit

Permalink
Replace binascii and struct with native Python methods (#2582)
Browse files Browse the repository at this point in the history
* refactor: replace binascii with bytes for hex conversions

Signed-off-by: vibhatsu <[email protected]>

* refactor: replace struct unpacking with bytes conversion

Signed-off-by: vibhatsu <[email protected]>

* simplify byte extraction for ELF header

Signed-off-by: vibhatsu <[email protected]>

* Revert "refactor: replace struct unpacking with bytes conversion"

This reverts commit 483f8c9.

* update CHANGELOG

Signed-off-by: vibhatsu <[email protected]>

---------

Signed-off-by: vibhatsu <[email protected]>
Co-authored-by: Willi Ballenthin <[email protected]>
  • Loading branch information
v1bh475u and williballenthin authored Feb 4, 2025
1 parent 96f9e7c commit a8e8935
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- strings: add type hints and fix uncovered bugs @williballenthin #2555
- elffile: handle symbols without a name @williballenthin #2553
- project: remove pytest-cov that wasn't used @williballenthin @2491
- replace binascii methods with native Python methods @v1bh475u #2582
- rules: scopes can now have subscope blocks with the same scope @williballenthin #2584

### capa Explorer Web
Expand Down
3 changes: 1 addition & 2 deletions capa/features/extractors/cape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import binascii
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias

from pydantic import Field, BaseModel, ConfigDict
Expand All @@ -27,7 +26,7 @@ def validate_hex_int(value):


def validate_hex_bytes(value):
return binascii.unhexlify(value) if isinstance(value, str) else value
return bytes.fromhex(value) if isinstance(value, str) else value


HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
Expand Down
5 changes: 2 additions & 3 deletions capa/features/freeze/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import binascii
from typing import Union, Literal, Optional, Annotated

from pydantic import Field, BaseModel, ConfigDict
Expand Down Expand Up @@ -85,7 +84,7 @@ def to_capa(self) -> capa.features.common.Feature:
return capa.features.insn.Number(self.number, description=self.description)

elif isinstance(self, BytesFeature):
return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description)
return capa.features.common.Bytes(bytes.fromhex(self.bytes), description=self.description)

elif isinstance(self, OffsetFeature):
return capa.features.insn.Offset(self.offset, description=self.description)
Expand Down Expand Up @@ -191,7 +190,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
elif isinstance(f, capa.features.common.Bytes):
buf = f.value
assert isinstance(buf, bytes)
return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
return BytesFeature(bytes=bytes.hex(buf), description=f.description)

elif isinstance(f, capa.features.insn.Offset):
assert isinstance(f.value, int)
Expand Down
3 changes: 1 addition & 2 deletions scripts/import-to-ida.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
"""

import logging
import binascii
from pathlib import Path

import ida_nalt
Expand Down Expand Up @@ -85,7 +84,7 @@ def main():
#
# see: https://github.com/idapython/bin/issues/11
a = meta.sample.md5.lower()
b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower()
b = bytes.hex(ida_nalt.retrieve_input_file_md5()).lower()
if not a.startswith(b):
logger.error("sample mismatch")
return -2
Expand Down
9 changes: 4 additions & 5 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.


import binascii
import contextlib
import collections
from pathlib import Path
Expand Down Expand Up @@ -942,17 +941,17 @@ def parametrize(params, values, **kwargs):
# insn/string, direct memory reference
("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
# insn/bytes
("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True),
("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True),
# don't extract byte features for obvious strings
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False),
("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
# push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False),
# IDA features included byte sequences read from invalid memory, fixed in #409
("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False),
# insn/bytes, pointer to string bytes
("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
# insn/characteristic(nzxor)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_binexport_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import binascii
from typing import cast

import pytest
Expand Down Expand Up @@ -302,7 +301,7 @@
(
"d1e650.ghidra.be2",
"function=0x1165a4",
capa.features.common.Bytes(binascii.unhexlify("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
True,
),
# # don't extract byte features for obvious strings
Expand Down
3 changes: 1 addition & 2 deletions tests/test_ida_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import sys
import inspect
import logging
import binascii
import traceback
from pathlib import Path

Expand All @@ -86,7 +85,7 @@ def check_input_file(wanted):
except UnicodeDecodeError:
# in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
# rather than the hex digest
found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()

if not wanted.startswith(found):
raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")
Expand Down

0 comments on commit a8e8935

Please sign in to comment.