mandiant · mr-tz · Nov 13, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 12, 2023
diff --git a/floss/language/go/extract.py b/floss/language/go/extract.py
@@ -37,6 +37,9 @@ def find_stack_strings_with_regex(
                 if not binary_string:
                     continue
 
+                if binary_string.endswith(b"\x00"):
+                    binary_string = binary_string[:-1]
+
                 addr = m.start()
                 # need to subtract opcode bytes offset
                 off_regex = len(m.group(0)) - len(binary_string)
@@ -98,6 +101,9 @@ def find_i386_stackstrings(section_data, offset, min_length):
 def get_stackstrings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
     """
     Find stackstrings in the given PE file.
+
+    TODO(mr-tz): algorithms need improvements / rethinking of approach
+     https://github.com/mandiant/flare-floss/issues/828
     """
 
     for section in pe.sections:
@@ -269,7 +275,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
     with floss.utils.timing("find struct string candidates"):
         struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
         if not struct_strings:
-            logger.warning("Failed to find struct string candidates: Is this a Go binary?")
+            logger.warning(
+                "Failed to find struct string candidates: Is this a Go binary? If so, the Go version may be unsupported."
+            )
             return
 
     with floss.utils.timing("find string blob"):
@@ -354,12 +362,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
         last_buf = string_blob_buf[last_pointer_offset:]
         for size in range(len(last_buf), 0, -1):
             try:
-                s = last_buf[:size].decode("utf-8")
+                _ = last_buf[:size].decode("utf-8")
             except UnicodeDecodeError:
                 continue
             else:
                 try:
-                    string = StaticString.from_utf8(last_buf[:size], last_pointer, min_length)
+                    string = StaticString.from_utf8(
+                        last_buf[:size], pe.get_offset_from_rva(last_pointer - image_base), min_length
+                    )
                     yield string
                 except ValueError:
                     pass
@@ -382,6 +392,25 @@ def extract_go_strings(sample, min_length) -> List[StaticString]:
     return go_strings
 
 
+def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: List[StaticString]) -> List[StaticString]:
+    pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)
+
+    struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
+    if not struct_strings:
+        return []
+
+    try:
+        string_blob_start, string_blob_end = find_string_blob_range(pe, struct_strings)
+    except ValueError:
+        return []
+
+    image_base = pe.OPTIONAL_HEADER.ImageBase
+    string_blob_start = pe.get_offset_from_rva(string_blob_start - image_base)
+    string_blob_end = pe.get_offset_from_rva(string_blob_end - image_base)
+
+    return list(filter(lambda s: string_blob_start <= s.offset < string_blob_end, static_strings))
+
+
 def main(argv=None):
     parser = argparse.ArgumentParser(description="Get Go strings")
     parser.add_argument("path", help="file or path to analyze")

diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py
@@ -123,6 +123,20 @@ def extract_rust_strings(sample: pathlib.Path, min_length: int) -> List[StaticSt
     return rust_strings
 
 
+def get_static_strings_from_rdata(sample, static_strings) -> List[StaticString]:
+    pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)
+
+    try:
+        rdata_section = get_rdata_section(pe)
+    except ValueError:
+        return []
+
+    start_rdata = rdata_section.PointerToRawData
+    end_rdata = start_rdata + rdata_section.SizeOfRawData
+
+    return list(filter(lambda s: start_rdata <= s.offset < end_rdata, static_strings))
+
+
 def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
     image_base = pe.OPTIONAL_HEADER.ImageBase
 
@@ -145,6 +159,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
     # select only UTF-8 strings and adjust offset
     static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)
 
+    # TODO(mr-tz) - handle miss in rust-hello64.exe
+    #  .rdata:00000001400C1270 0A                      aPanickedAfterP db 0Ah                  ; DATA XREF: .rdata:00000001400C12B8↓o
+    #  .rdata:00000001400C1271 70 61 6E 69 63 6B 65 64…                db 'panicked after panic::always_abort(), aborting.',0Ah,0
+    #  .rdata:00000001400C12A2 00 00 00 00 00 00                       align 8
+
     struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))
 
     if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]:
@@ -157,6 +176,11 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
         xrefs_lea = find_lea_xrefs(pe)
         xrefs = itertools.chain(struct_string_addrs, xrefs_lea)
 
+        # TODO(mr-tz) - handle movdqa rust-hello64.exe
+        #  .text:0000000140026046 66 0F 6F 05 02 71 09 00                 movdqa  xmm0, cs:xmmword_1400BD150
+        #  .text:000000014002604E 66 0F 6F 0D 0A 71 09 00                 movdqa  xmm1, cs:xmmword_1400BD160
+        #  .text:0000000140026056 66 0F 6F 15 12 71 09 00                 movdqa  xmm2, cs:xmmword_1400BD170
+
     else:
         logger.error("unsupported architecture: %s", pe.FILE_HEADER.Machine)
         return []

diff --git a/floss/main.py b/floss/main.py
@@ -559,6 +559,10 @@ def main(argv=None) -> int:
     else:
         lang_id = identify_language(sample, static_strings)
 
+    # TODO(mr-tz): verify user-selected language makes sense and at least warn user
+    #  include language version in results, if available
+    #  https://github.com/mandiant/flare-floss/issues/900
+
     if lang_id == Language.GO:
         if analysis.enable_tight_strings or analysis.enable_stack_strings or analysis.enable_decoded_strings:
             logger.warning(
@@ -621,8 +625,13 @@ def main(argv=None) -> int:
                 results.strings.language_strings = floss.language.go.extract.extract_go_strings(sample, args.min_length)
                 results.metadata.runtime.language_strings = get_runtime_diff(interim)
 
+                # missed strings only includes non-identified strings in searched range
+                # here currently only focus on strings in string blob range
+                string_blob_strings = floss.language.go.extract.get_static_strings_from_blob_range(
+                    sample, static_strings
+                )
                 results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
-                    static_strings, results.strings.language_strings, args.min_length
+                    string_blob_strings, results.strings.language_strings, args.min_length
                 )
 
             elif lang_id == Language.RUST:
@@ -634,8 +643,10 @@ def main(argv=None) -> int:
                 )
                 results.metadata.runtime.language_strings = get_runtime_diff(interim)
 
+                # currently Rust strings are only extracted from the .rdata section
+                rdata_strings = floss.language.rust.extract.get_static_strings_from_rdata(sample, static_strings)
                 results.strings.language_strings_missed = floss.language.utils.get_missed_strings(
-                    static_strings, results.strings.language_strings, args.min_length
+                    rdata_strings, results.strings.language_strings, args.min_length
                 )
     if (
         results.analysis.enable_decoded_strings

diff --git a/floss/render/default.py b/floss/render/default.py
@@ -145,15 +145,14 @@ def strtime(seconds):
 
 def render_language_strings(language, language_strings, language_strings_missed, console, verbose, disable_headers):
     strings = sorted(language_strings + language_strings_missed, key=lambda s: s.offset)
-    render_heading(f"FLOSS {language.upper()} STRINGS", len(strings), console, verbose, disable_headers)
+    render_heading(f"FLOSS {language.upper()} STRINGS ({len(strings)})", console, verbose, disable_headers)
     offset_len = len(f"{strings[-1].offset}")
     for s in strings:
         if verbose == Verbosity.DEFAULT:
             console.print(sanitize(s.string, is_ascii_only=False), markup=False)
         else:
             colored_string = string_style(sanitize(s.string, is_ascii_only=False))
             console.print(f"0x{s.offset:>0{offset_len}x} {colored_string}")
-    console.print("\n")
 
 
 def render_static_substrings(strings, encoding, offset_len, console, verbose, disable_headers):
@@ -166,11 +165,10 @@ def render_static_substrings(strings, encoding, offset_len, console, verbose, di
         else:
             colored_string = string_style(sanitize(s.string))
             console.print(f"0x{s.offset:>0{offset_len}x} {colored_string}")
-    console.print("\n")
 
 
 def render_staticstrings(strings, console, verbose, disable_headers):
-    render_heading("FLOSS STATIC STRINGS", len(strings), console, verbose, disable_headers)
+    render_heading(f"FLOSS STATIC STRINGS ({len(strings)})", console, verbose, disable_headers)
 
     ascii_strings = list(filter(lambda s: s.encoding == StringEncoding.ASCII, strings))
     unicode_strings = list(filter(lambda s: s.encoding == StringEncoding.UTF16LE, strings))
@@ -184,6 +182,7 @@ def render_staticstrings(strings, console, verbose, disable_headers):
     offset_len = max(ascii_offset_len, unicode_offset_len)
 
     render_static_substrings(ascii_strings, "ASCII", offset_len, console, verbose, disable_headers)
+    console.print("\n")
     render_static_substrings(unicode_strings, "UTF-16LE", offset_len, console, verbose, disable_headers)
 
 
@@ -249,13 +248,13 @@ def render_decoded_strings(decoded_strings: List[DecodedString], console, verbos
                 console.print("\n")
 
 
-def render_heading(heading, n, console, verbose, disable_headers):
+def render_heading(heading, console, verbose, disable_headers):
     """
     example::
 
-        ===========================
-        ‖ FLOSS TIGHT STRINGS (0) ‖
-        ===========================
+         ─────────────────────────
+          FLOSS TIGHT STRINGS (0)
+         ─────────────────────────
     """
     if disable_headers:
         return
@@ -314,6 +313,10 @@ def render(results: floss.results.ResultDocument, verbose, disable_headers, colo
         render_meta(results, console, verbose)
         console.print("\n")
 
+    if results.analysis.enable_static_strings:
+        render_staticstrings(results.strings.static_strings, console, verbose, disable_headers)
+        console.print("\n")
+
     if results.metadata.language in (
         floss.language.identify.Language.GO.value,
         floss.language.identify.Language.RUST.value,
@@ -326,23 +329,22 @@ def render(results: floss.results.ResultDocument, verbose, disable_headers, colo
             verbose,
             disable_headers,
         )
-
-    elif results.analysis.enable_static_strings:
-        render_staticstrings(results.strings.static_strings, console, verbose, disable_headers)
         console.print("\n")
 
     if results.analysis.enable_stack_strings:
-        render_heading("FLOSS STACK STRINGS", len(results.strings.stack_strings), console, verbose, disable_headers)
+        render_heading(f"FLOSS STACK STRINGS ({len(results.strings.stack_strings)})", console, verbose, disable_headers)
         render_stackstrings(results.strings.stack_strings, console, verbose, disable_headers)
         console.print("\n")
 
     if results.analysis.enable_tight_strings:
-        render_heading("FLOSS TIGHT STRINGS", len(results.strings.tight_strings), console, verbose, disable_headers)
+        render_heading(f"FLOSS TIGHT STRINGS ({len(results.strings.tight_strings)})", console, verbose, disable_headers)
         render_stackstrings(results.strings.tight_strings, console, verbose, disable_headers)
         console.print("\n")
 
     if results.analysis.enable_decoded_strings:
-        render_heading("FLOSS DECODED STRINGS", len(results.strings.decoded_strings), console, verbose, disable_headers)
+        render_heading(
+            f"FLOSS DECODED STRINGS ({len(results.strings.decoded_strings)})", console, verbose, disable_headers
+        )
         render_decoded_strings(results.strings.decoded_strings, console, verbose, disable_headers)
 
     console.file.seek(0)

diff --git a/floss/results.py b/floss/results.py
@@ -1,5 +1,6 @@
 # Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
 
+import re
 import json
 import datetime
 from enum import Enum
@@ -139,6 +140,9 @@ def from_utf8(cls, buf, addr, min_length):
         except UnicodeDecodeError:
             raise ValueError("not utf-8")
 
+        if not re.sub(r"[\r\n\t]", "", decoded_string).isprintable():
+            raise ValueError("not printable")
+
         if len(decoded_string) < min_length:
             raise ValueError("too short")
         return cls(string=decoded_string, offset=addr, encoding=StringEncoding.UTF8)