Skip to content

Commit

Permalink
Merge branch 'master' into utf8_decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
Arker123 authored Nov 9, 2023
2 parents 18e6080 + 9405cb8 commit 6d8e314
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Installation
You can install FLOSS in a few different ways.
First, if you simply want to use FLOSS to extract strings, just download
the [standalone binaries](https://github.com/mandiant/flare-floss/releases).
the [standalone binaries](https://github.com/mandiant/flare-floss/releases/latest).
However, if you want to use FLOSS as a Python library,
you can install the package directly from GitHub using `pip`.
Finally, if you'd like to contribute patches or features to FLOSS,
Expand Down
4 changes: 3 additions & 1 deletion floss/language/go/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
try:
string_blob_start, string_blob_end = find_string_blob_range(pe, struct_strings)
except ValueError:
logger.warning("Failed to find string blob range: Go version may be unsupported.")
logger.warning(
"Failed to find string blob range: Is this a Go binary? If so, the Go version may be unsupported."
)
return

with floss.utils.timing("collect string blob strings"):
Expand Down
52 changes: 46 additions & 6 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib
import argparse
import itertools
from typing import List, Tuple, Iterable
from typing import List, Tuple, Iterable, Optional

import pefile
import binary2strings as b2s
Expand Down Expand Up @@ -32,6 +32,41 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
raise ValueError("no .rdata section found")


def fix_b2s_wide_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes
) -> List[Tuple[str, str, Tuple[int, int], bool]]:
# TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings
# handle special cases here until fixed
# https://github.com/mandiant/flare-floss/issues/867
fixed_strings: List[Tuple[str, str, Tuple[int, int], bool]] = list()
last_fixup: Optional[Tuple[str, str, Tuple[int, int], bool]] = None
for string in strings:
s = string[0]
string_type = string[1]
start = string[2][0]

if string_type == "WIDE_STRING":
sd = s.encode("utf-16le", "ignore")
# utf-8 strings will not start with \x00
if sd[0] == 0:
new_string = b2s.extract_string(buffer[start + 1 :])
last_fixup = (
new_string[0],
new_string[1],
(new_string[2][0] + start + 1, new_string[2][1] + start + 1),
new_string[3],
)
if len(last_fixup[0]) < min_length:
last_fixup = None
else:
if last_fixup and s in last_fixup[0]:
fixed_strings.append(last_fixup)
else:
fixed_strings.append(string)
last_fixup = None
return fixed_strings


def filter_and_transform_utf8_strings(
strings: List[Tuple[str, int, int]],
start_rdata: int,
Expand All @@ -49,7 +84,7 @@ def filter_and_transform_utf8_strings(
return transformed_strings


def split_strings(static_strings: List[StaticString], address: int) -> None:
def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None:
"""
if address is in between start and end of a string in ref data then split the string
this modifies the elements of the static strings list directly
Expand All @@ -60,8 +95,12 @@ def split_strings(static_strings: List[StaticString], address: int) -> None:
rust_string = string.string[0 : address - string.offset]
rest = string.string[address - string.offset :]

static_strings.append(StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8))
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))
if len(rust_string) >= min_length:
static_strings.append(
StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8)
)
if len(rest) >= min_length:
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))

# remove string from static_strings
for static_string in static_strings:
Expand Down Expand Up @@ -100,12 +139,13 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
end_rdata = start_rdata + rdata_section.SizeOfRawData
virtual_address = rdata_section.VirtualAddress
pointer_to_raw_data = rdata_section.PointerToRawData
buffer_rdata = rdata_section.get_data()

# extract utf-8 strings
strings = extract_utf8_strings(pe, min_length)

# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(strings, start_rdata)
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)

struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))

Expand All @@ -130,7 +170,7 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
if not (start_rdata <= address < end_rdata):
continue

split_strings(static_strings, address)
split_strings(static_strings, address, min_length)

return static_strings

Expand Down
30 changes: 20 additions & 10 deletions floss/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,28 +546,38 @@ def main(argv=None) -> int:

static_runtime = get_runtime_diff(interim)

lang_id = identify_language(sample, static_strings)

# set language configurations
if (lang_id == Language.GO and args.language == "") or args.language == Language.GO.value:
lang_id: Language
if args.language == Language.GO.value:
lang_id = Language.GO
elif args.language == Language.RUST.value:
lang_id = Language.RUST
elif args.language == Language.DOTNET.value:
lang_id = Language.DOTNET
elif args.language == "none":
lang_id = Language.UNKNOWN
else:
lang_id = identify_language(sample, static_strings)

if lang_id == Language.GO:
if analysis.enable_tight_strings or analysis.enable_stack_strings or analysis.enable_decoded_strings:
logger.warning(
"FLOSS handles Go static strings, but string deobfuscation may be inaccurate and take a long time"
)
results.metadata.language = Language.GO.value

elif (lang_id == Language.RUST and args.language == "") or args.language == Language.RUST.value:
elif lang_id == Language.RUST:
if analysis.enable_tight_strings or analysis.enable_stack_strings or analysis.enable_decoded_strings:
logger.warning(
"FLOSS handles Rust static strings, but string deobfuscation may be inaccurate and take a long time"
)
results.metadata.language = Language.RUST.value

elif (lang_id == Language.DOTNET and args.language == "") or args.language == Language.DOTNET.value:
logger.warning(".NET language-specific string extraction is not supported")
logger.warning(" will NOT deobfuscate any .NET strings")
elif lang_id == Language.DOTNET:
logger.warning(".NET language-specific string extraction is not supported yet")
logger.warning("Furthermore, FLOSS does NOT attempt to deobfuscate any strings from .NET binaries")

# let's enable .NET strings after we can deobfuscate them
# enable .NET strings once we can extract them
# results.metadata.language = Language.DOTNET.value

# TODO for pure .NET binaries our deobfuscation algorithms do nothing, but for mixed-mode assemblies they may
Expand Down Expand Up @@ -604,7 +614,7 @@ def main(argv=None) -> int:
if not lang_id:
logger.info("extracting static strings")
else:
if (lang_id == Language.GO and args.language == "") or args.language == Language.GO.value:
if lang_id == Language.GO:
logger.info("extracting language-specific Go strings")

interim = time()
Expand All @@ -615,7 +625,7 @@ def main(argv=None) -> int:
static_strings, results.strings.language_strings, args.min_length
)

elif (lang_id == Language.RUST and args.language == "") or args.language == Language.RUST.value:
elif lang_id == Language.RUST:
logger.info("extracting language-specific Rust strings")

interim = time()
Expand Down
12 changes: 6 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,22 @@
install_requires=requirements,
extras_require={
"dev": [
"pre-commit==3.4.0",
"pre-commit==3.5.0",
"pyyaml==6.0.1",
"pytest==7.4.1",
"pytest==7.4.3",
"pytest-sugar==0.9.4",
"pytest-instafail==0.5.0",
"pytest-cov==4.1.0",
"pycodestyle==2.11.0",
"black==23.9.1",
"pycodestyle==2.11.1",
"black==23.10.1",
"isort==5.11.4",
"mypy==1.5.1",
"mypy==1.6.1",
# type stubs for mypy
"types-PyYAML==6.0.10",
"types-tabulate==0.9.0.3",
],
"build": [
"pyinstaller==5.13.2",
"pyinstaller==6.1.0",
],
},
zip_safe=False,
Expand Down
3 changes: 1 addition & 2 deletions tests/test_language_extract_rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@ def rust_strings64():
# .rdata:00000001400BD040 30 D0 0B 40 01 00 pieces ___str_ <offset aHelloWorld, 0Eh>
# .rdata:00000001400BD040 00 00 00 00 ; "Hello, world!\n"
pytest.param("Hello, world!", 0xBB030, StringEncoding.UTF8, "rust_strings64"),
# TODO enable, see issue #867
# .rdata:00000001400BD050 69 6E 76 61 6C 69 aInvalidArgs db 'invalid args',0
# .rdata:00000001400BD05D 00 00 00 align 20h
# .rdata:00000001400BD060 50 D0 0B 40 01 00 stru_1400BD060 ___str_ <offset aInvalidArgs, 0Ch>
# .rdata:00000001400BD060 00 00 00 00 ; "invalid args"
# pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"),
pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"),
],
)
def test_data_string_offset(request, string, offset, encoding, rust_strings):
Expand Down

0 comments on commit 6d8e314

Please sign in to comment.