Skip to content

Commit

Permalink
fix extractions and improve language_strings_missed contents
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-tz committed Nov 10, 2023
1 parent 9405cb8 commit 9c8e9e6
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 5 deletions.
35 changes: 32 additions & 3 deletions floss/language/go/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def find_stack_strings_with_regex(
if not binary_string:
continue

if binary_string.endswith(b"\x00"):
binary_string = binary_string[:-1]

addr = m.start()
# need to subtract opcode bytes offset
off_regex = len(m.group(0)) - len(binary_string)
Expand Down Expand Up @@ -98,6 +101,9 @@ def find_i386_stackstrings(section_data, offset, min_length):
def get_stackstrings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
"""
Find stackstrings in the given PE file.
TODO(mr-tz): algorithms need improvements / rethinking of approach
https://github.com/mandiant/flare-floss/issues/828
"""

for section in pe.sections:
Expand Down Expand Up @@ -269,7 +275,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
with floss.utils.timing("find struct string candidates"):
struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
if not struct_strings:
logger.warning("Failed to find struct string candidates: Is this a Go binary?")
logger.warning(
"Failed to find struct string candidates: Is this a Go binary? If so, the Go version may be unsupported."
)
return

with floss.utils.timing("find string blob"):
Expand Down Expand Up @@ -354,12 +362,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
last_buf = string_blob_buf[last_pointer_offset:]
for size in range(len(last_buf), 0, -1):
try:
s = last_buf[:size].decode("utf-8")
_ = last_buf[:size].decode("utf-8")
except UnicodeDecodeError:
continue
else:
try:
string = StaticString.from_utf8(last_buf[:size], last_pointer, min_length)
string = StaticString.from_utf8(
last_buf[:size], pe.get_offset_from_rva(last_pointer - image_base), min_length
)
yield string
except ValueError:
pass
Expand All @@ -382,6 +392,25 @@ def extract_go_strings(sample, min_length) -> List[StaticString]:
return go_strings


def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: List[StaticString]) -> List[StaticString]:
pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)

struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
if not struct_strings:
return []

try:
string_blob_start, string_blob_end = find_string_blob_range(pe, struct_strings)
except ValueError:
return []

image_base = pe.OPTIONAL_HEADER.ImageBase
string_blob_start = pe.get_offset_from_rva(string_blob_start - image_base)
string_blob_end = pe.get_offset_from_rva(string_blob_end - image_base)

return list(filter(lambda s: string_blob_start <= s.offset < string_blob_end, static_strings))


def main(argv=None):
parser = argparse.ArgumentParser(description="Get Go strings")
parser.add_argument("path", help="file or path to analyze")
Expand Down
24 changes: 24 additions & 0 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,20 @@ def extract_rust_strings(sample: pathlib.Path, min_length: int) -> List[StaticSt
return rust_strings


def get_static_strings_from_rdata(sample, static_strings) -> List[StaticString]:
pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)

try:
rdata_section = get_rdata_section(pe)
except ValueError:
return []

start_rdata = rdata_section.PointerToRawData
end_rdata = start_rdata + rdata_section.SizeOfRawData

return list(filter(lambda s: start_rdata <= s.offset < end_rdata, static_strings))


def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
image_base = pe.OPTIONAL_HEADER.ImageBase

Expand All @@ -145,6 +159,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)

# TODO(mr-tz) - handle miss in rust-hello64.exe
# .rdata:00000001400C1270 0A aPanickedAfterP db 0Ah ; DATA XREF: .rdata:00000001400C12B8↓o
# .rdata:00000001400C1271 70 61 6E 69 63 6B 65 64… db 'panicked after panic::always_abort(), aborting.',0Ah,0
# .rdata:00000001400C12A2 00 00 00 00 00 00 align 8

struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))

if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]:
Expand All @@ -157,6 +176,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
xrefs_lea = find_lea_xrefs(pe)
xrefs = itertools.chain(struct_string_addrs, xrefs_lea)

# TODO(mr-tz) - handle movdqa rust-hello64.exe
# .text:0000000140026046 66 0F 6F 05 02 71 09 00 movdqa xmm0, cs:xmmword_1400BD150
# .text:000000014002604E 66 0F 6F 0D 0A 71 09 00 movdqa xmm1, cs:xmmword_1400BD160
# .text:0000000140026056 66 0F 6F 15 12 71 09 00 movdqa xmm2, cs:xmmword_1400BD170

else:
logger.error("unsupported architecture: %s", pe.FILE_HEADER.Machine)
return []
Expand Down
13 changes: 11 additions & 2 deletions floss/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@ def main(argv=None) -> int:
else:
lang_id = identify_language(sample, static_strings)

# TODO(mr-tz): verify user-selected language makes sense and at least warn user
# include language version in results, if available
# https://github.com/mandiant/flare-floss/issues/900

if lang_id == Language.GO:
if analysis.enable_tight_strings or analysis.enable_stack_strings or analysis.enable_decoded_strings:
logger.warning(
Expand Down Expand Up @@ -621,8 +625,11 @@ def main(argv=None) -> int:
results.strings.language_strings = floss.language.go.extract.extract_go_strings(sample, args.min_length)
results.metadata.runtime.language_strings = get_runtime_diff(interim)

# missed strings only includes non-identified strings in searched range
# here currently only focus on strings in string blob range
string_blob_strings = floss.language.go.extract.get_static_strings_from_blob_range(sample, static_strings)
results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
static_strings, results.strings.language_strings, args.min_length
string_blob_strings, results.strings.language_strings, args.min_length
)

elif lang_id == Language.RUST:
Expand All @@ -634,8 +641,10 @@ def main(argv=None) -> int:
)
results.metadata.runtime.language_strings = get_runtime_diff(interim)

# currently Rust strings are only extracted from the .rdata section
rdata_strings = floss.language.rust.extract.get_static_strings_from_rdata(sample, static_strings)
results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
static_strings, results.strings.language_strings, args.min_length
rdata_strings, results.strings.language_strings, args.min_length
)
if (
results.analysis.enable_decoded_strings
Expand Down
4 changes: 4 additions & 0 deletions floss/results.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.

import re
import json
import datetime
from enum import Enum
Expand Down Expand Up @@ -139,6 +140,9 @@ def from_utf8(cls, buf, addr, min_length):
except UnicodeDecodeError:
raise ValueError("not utf-8")

if not re.sub(r"[\r\n\t]", "", decoded_string).isprintable():
raise ValueError("not printable")

if len(decoded_string) < min_length:
raise ValueError("too short")
return cls(string=decoded_string, offset=addr, encoding=StringEncoding.UTF8)
Expand Down

0 comments on commit 9c8e9e6

Please sign in to comment.