Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix extractions and improve language_strings_missed contents #901

Merged
merged 3 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions floss/language/go/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def find_stack_strings_with_regex(
if not binary_string:
continue

if binary_string.endswith(b"\x00"):
binary_string = binary_string[:-1]

addr = m.start()
# need to subtract opcode bytes offset
off_regex = len(m.group(0)) - len(binary_string)
Expand Down Expand Up @@ -98,6 +101,9 @@ def find_i386_stackstrings(section_data, offset, min_length):
def get_stackstrings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
"""
Find stackstrings in the given PE file.

TODO(mr-tz): algorithms need improvements / rethinking of approach
https://github.com/mandiant/flare-floss/issues/828
"""

for section in pe.sections:
Expand Down Expand Up @@ -269,7 +275,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
with floss.utils.timing("find struct string candidates"):
struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
if not struct_strings:
logger.warning("Failed to find struct string candidates: Is this a Go binary?")
logger.warning(
"Failed to find struct string candidates: Is this a Go binary? If so, the Go version may be unsupported."
)
return

with floss.utils.timing("find string blob"):
Expand Down Expand Up @@ -354,12 +362,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
last_buf = string_blob_buf[last_pointer_offset:]
for size in range(len(last_buf), 0, -1):
try:
s = last_buf[:size].decode("utf-8")
_ = last_buf[:size].decode("utf-8")
except UnicodeDecodeError:
continue
else:
try:
string = StaticString.from_utf8(last_buf[:size], last_pointer, min_length)
string = StaticString.from_utf8(
last_buf[:size], pe.get_offset_from_rva(last_pointer - image_base), min_length
)
yield string
except ValueError:
pass
Expand All @@ -382,6 +392,25 @@ def extract_go_strings(sample, min_length) -> List[StaticString]:
return go_strings


def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: List[StaticString]) -> List[StaticString]:
pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)

struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
if not struct_strings:
return []

try:
string_blob_start, string_blob_end = find_string_blob_range(pe, struct_strings)
except ValueError:
return []

image_base = pe.OPTIONAL_HEADER.ImageBase
string_blob_start = pe.get_offset_from_rva(string_blob_start - image_base)
string_blob_end = pe.get_offset_from_rva(string_blob_end - image_base)

return list(filter(lambda s: string_blob_start <= s.offset < string_blob_end, static_strings))


def main(argv=None):
parser = argparse.ArgumentParser(description="Get Go strings")
parser.add_argument("path", help="file or path to analyze")
Expand Down
24 changes: 24 additions & 0 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,20 @@ def extract_rust_strings(sample: pathlib.Path, min_length: int) -> List[StaticSt
return rust_strings


def get_static_strings_from_rdata(sample, static_strings) -> List[StaticString]:
pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)

try:
rdata_section = get_rdata_section(pe)
except ValueError:
return []

start_rdata = rdata_section.PointerToRawData
end_rdata = start_rdata + rdata_section.SizeOfRawData

return list(filter(lambda s: start_rdata <= s.offset < end_rdata, static_strings))


def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
image_base = pe.OPTIONAL_HEADER.ImageBase

Expand All @@ -145,6 +159,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)

# TODO(mr-tz) - handle miss in rust-hello64.exe
# .rdata:00000001400C1270 0A aPanickedAfterP db 0Ah ; DATA XREF: .rdata:00000001400C12B8↓o
# .rdata:00000001400C1271 70 61 6E 69 63 6B 65 64… db 'panicked after panic::always_abort(), aborting.',0Ah,0
# .rdata:00000001400C12A2 00 00 00 00 00 00 align 8

struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))

if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]:
Expand All @@ -157,6 +176,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
xrefs_lea = find_lea_xrefs(pe)
xrefs = itertools.chain(struct_string_addrs, xrefs_lea)

# TODO(mr-tz) - handle movdqa rust-hello64.exe
# .text:0000000140026046 66 0F 6F 05 02 71 09 00 movdqa xmm0, cs:xmmword_1400BD150
# .text:000000014002604E 66 0F 6F 0D 0A 71 09 00 movdqa xmm1, cs:xmmword_1400BD160
# .text:0000000140026056 66 0F 6F 15 12 71 09 00 movdqa xmm2, cs:xmmword_1400BD170

else:
logger.error("unsupported architecture: %s", pe.FILE_HEADER.Machine)
return []
Expand Down
15 changes: 13 additions & 2 deletions floss/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@ def main(argv=None) -> int:
else:
lang_id = identify_language(sample, static_strings)

# TODO(mr-tz): verify user-selected language makes sense and at least warn user
# include language version in results, if available
# https://github.com/mandiant/flare-floss/issues/900

if lang_id == Language.GO:
if analysis.enable_tight_strings or analysis.enable_stack_strings or analysis.enable_decoded_strings:
logger.warning(
Expand Down Expand Up @@ -621,8 +625,13 @@ def main(argv=None) -> int:
results.strings.language_strings = floss.language.go.extract.extract_go_strings(sample, args.min_length)
results.metadata.runtime.language_strings = get_runtime_diff(interim)

# missed strings only includes non-identified strings in searched range
# here currently only focus on strings in string blob range
string_blob_strings = floss.language.go.extract.get_static_strings_from_blob_range(
sample, static_strings
)
results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
static_strings, results.strings.language_strings, args.min_length
string_blob_strings, results.strings.language_strings, args.min_length
)

elif lang_id == Language.RUST:
Expand All @@ -634,8 +643,10 @@ def main(argv=None) -> int:
)
results.metadata.runtime.language_strings = get_runtime_diff(interim)

# currently Rust strings are only extracted from the .rdata section
rdata_strings = floss.language.rust.extract.get_static_strings_from_rdata(sample, static_strings)
results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
static_strings, results.strings.language_strings, args.min_length
rdata_strings, results.strings.language_strings, args.min_length
)
if (
results.analysis.enable_decoded_strings
Expand Down
30 changes: 16 additions & 14 deletions floss/render/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,14 @@ def strtime(seconds):

def render_language_strings(language, language_strings, language_strings_missed, console, verbose, disable_headers):
strings = sorted(language_strings + language_strings_missed, key=lambda s: s.offset)
render_heading(f"FLOSS {language.upper()} STRINGS", len(strings), console, verbose, disable_headers)
render_heading(f"FLOSS {language.upper()} STRINGS ({len(strings)})", console, verbose, disable_headers)
offset_len = len(f"{strings[-1].offset}")
for s in strings:
if verbose == Verbosity.DEFAULT:
console.print(sanitize(s.string, is_ascii_only=False), markup=False)
else:
colored_string = string_style(sanitize(s.string, is_ascii_only=False))
console.print(f"0x{s.offset:>0{offset_len}x} {colored_string}")
console.print("\n")


def render_static_substrings(strings, encoding, offset_len, console, verbose, disable_headers):
Expand All @@ -166,11 +165,10 @@ def render_static_substrings(strings, encoding, offset_len, console, verbose, di
else:
colored_string = string_style(sanitize(s.string))
console.print(f"0x{s.offset:>0{offset_len}x} {colored_string}")
console.print("\n")


def render_staticstrings(strings, console, verbose, disable_headers):
render_heading("FLOSS STATIC STRINGS", len(strings), console, verbose, disable_headers)
render_heading(f"FLOSS STATIC STRINGS ({len(strings)})", console, verbose, disable_headers)

ascii_strings = list(filter(lambda s: s.encoding == StringEncoding.ASCII, strings))
unicode_strings = list(filter(lambda s: s.encoding == StringEncoding.UTF16LE, strings))
Expand All @@ -184,6 +182,7 @@ def render_staticstrings(strings, console, verbose, disable_headers):
offset_len = max(ascii_offset_len, unicode_offset_len)

render_static_substrings(ascii_strings, "ASCII", offset_len, console, verbose, disable_headers)
console.print("\n")
render_static_substrings(unicode_strings, "UTF-16LE", offset_len, console, verbose, disable_headers)


Expand Down Expand Up @@ -249,13 +248,13 @@ def render_decoded_strings(decoded_strings: List[DecodedString], console, verbos
console.print("\n")


def render_heading(heading, n, console, verbose, disable_headers):
def render_heading(heading, console, verbose, disable_headers):
"""
example::

===========================
FLOSS TIGHT STRINGS (0)
===========================
─────────────────────────
FLOSS TIGHT STRINGS (0)
─────────────────────────
"""
if disable_headers:
return
Expand Down Expand Up @@ -314,6 +313,10 @@ def render(results: floss.results.ResultDocument, verbose, disable_headers, colo
render_meta(results, console, verbose)
console.print("\n")

if results.analysis.enable_static_strings:
render_staticstrings(results.strings.static_strings, console, verbose, disable_headers)
console.print("\n")

if results.metadata.language in (
floss.language.identify.Language.GO.value,
floss.language.identify.Language.RUST.value,
Expand All @@ -326,23 +329,22 @@ def render(results: floss.results.ResultDocument, verbose, disable_headers, colo
verbose,
disable_headers,
)

elif results.analysis.enable_static_strings:
render_staticstrings(results.strings.static_strings, console, verbose, disable_headers)
console.print("\n")

if results.analysis.enable_stack_strings:
render_heading("FLOSS STACK STRINGS", len(results.strings.stack_strings), console, verbose, disable_headers)
render_heading(f"FLOSS STACK STRINGS ({len(results.strings.stack_strings)})", console, verbose, disable_headers)
render_stackstrings(results.strings.stack_strings, console, verbose, disable_headers)
console.print("\n")

if results.analysis.enable_tight_strings:
render_heading("FLOSS TIGHT STRINGS", len(results.strings.tight_strings), console, verbose, disable_headers)
render_heading(f"FLOSS TIGHT STRINGS ({len(results.strings.tight_strings)})", console, verbose, disable_headers)
render_stackstrings(results.strings.tight_strings, console, verbose, disable_headers)
console.print("\n")

if results.analysis.enable_decoded_strings:
render_heading("FLOSS DECODED STRINGS", len(results.strings.decoded_strings), console, verbose, disable_headers)
render_heading(
f"FLOSS DECODED STRINGS ({len(results.strings.decoded_strings)})", console, verbose, disable_headers
)
render_decoded_strings(results.strings.decoded_strings, console, verbose, disable_headers)

console.file.seek(0)
Expand Down
4 changes: 4 additions & 0 deletions floss/results.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.

import re
import json
import datetime
from enum import Enum
Expand Down Expand Up @@ -139,6 +140,9 @@ def from_utf8(cls, buf, addr, min_length):
except UnicodeDecodeError:
raise ValueError("not utf-8")

if not re.sub(r"[\r\n\t]", "", decoded_string).isprintable():
raise ValueError("not printable")
Comment on lines +143 to +144
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we've had something similar before and removed it, during my testing this was helpful though


if len(decoded_string) < min_length:
raise ValueError("too short")
return cls(string=decoded_string, offset=addr, encoding=StringEncoding.UTF8)
Expand Down