Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract Rust specific strings from binaries #791 #836

Merged
merged 44 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
a75661e
Initial implementation of Rust specific strings
Arker123 Jul 17, 2023
e7c7595
New algorithm
Arker123 Jul 28, 2023
8a394bb
code style
Arker123 Jul 28, 2023
e9ca68e
Implemented separation of references from .text segment
Arker123 Aug 5, 2023
45978ea
Added rust coverage script
Arker123 Aug 5, 2023
4cbffaf
Introduced shared functions into language/utils.py
Arker123 Aug 5, 2023
f128d19
Refractored Go and Rust extraction files
Arker123 Aug 5, 2023
80dce99
Removed unused functions
Arker123 Aug 5, 2023
13c8920
Modularized code into separate functions
Arker123 Aug 5, 2023
27958fb
Merge remote-tracking branch 'origin/master' into rust-strings
Arker123 Aug 5, 2023
e074722
Refractored comments and type hints
Arker123 Aug 5, 2023
dbf7ad1
Tweaks
Arker123 Aug 5, 2023
bbd3d53
Update coverage Script
Arker123 Aug 11, 2023
4839543
Tweaks
Arker123 Aug 11, 2023
3ebd075
Minor fixes
Arker123 Aug 11, 2023
226486e
code style
Arker123 Aug 11, 2023
c46410e
Apply suggestions from code review
Arker123 Aug 14, 2023
8fabe4b
Tweaks
Arker123 Aug 14, 2023
8bd3711
Minor fixes
Arker123 Aug 14, 2023
74f3a91
code style
Arker123 Aug 14, 2023
2d5bf95
Update coverage script
Arker123 Aug 15, 2023
76d5f84
Update coverage script
Arker123 Aug 17, 2023
b02fc6a
Tweaks
Arker123 Aug 17, 2023
39e814c
Apply suggestions from code review
Arker123 Aug 19, 2023
02288d7
Tweaks
Arker123 Aug 19, 2023
797e5e3
Minor fixes
Arker123 Aug 19, 2023
657d497
Design Tweaks
Arker123 Aug 21, 2023
73afe8b
Refractored Design
Arker123 Aug 21, 2023
267862e
Improved Design
Arker123 Aug 22, 2023
9fe75c7
Further Improvised Design
Arker123 Aug 22, 2023
a67f9f2
Tweaks
Arker123 Aug 22, 2023
07a7558
Design Tweaks
Arker123 Aug 22, 2023
5a6fdb6
Updated Design Structure
Arker123 Aug 22, 2023
1650f8b
Cleanup
Arker123 Aug 22, 2023
6cdccb3
Rust updates (#7)
mr-tz Aug 22, 2023
62405fe
Added push and mov xrefs for i386 arch and test updates
Arker123 Aug 22, 2023
c98450d
Tweaks
Arker123 Aug 23, 2023
57fc902
Update floss/language/go/coverage.py
Arker123 Aug 23, 2023
ef27592
Add push and mov for amd64
Arker123 Aug 23, 2023
1909255
Merge branch 'rust-strings' of https://github.com/Arker123/flare-flos…
Arker123 Aug 23, 2023
6011ea7
Update Comments
Arker123 Aug 23, 2023
890ba55
Tweaks
Arker123 Aug 23, 2023
df20ec1
Comment Tweaks
Arker123 Aug 23, 2023
2fdb823
Tweaks
Arker123 Aug 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/mypy/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ ignore_missing_imports = True
ignore_missing_imports = True

[mypy-bs4.*]
ignore_missing_imports = True
ignore_missing_imports = True
[mypy-binary2strings.*]
ignore_missing_imports = True
227 changes: 6 additions & 221 deletions floss/language/go/coverage.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
import sys
import hashlib
import logging
import pathlib
import argparse
from typing import List

import pefile
import tabulate

from floss.utils import get_static_strings
from floss.results import StaticString, StringEncoding
from floss.render.sanitize import sanitize
from floss.language.utils import get_extract_stats
from floss.language.go.extract import extract_go_strings

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -59,224 +57,11 @@ def main():

go_strings = extract_go_strings(path, args.min_length)

get_extract_stats(pe, static_strings, go_strings, args.min_length)


def get_extract_stats(pe, all_ss_strings: List[StaticString], go_strings, min_len) -> float:
all_strings = list()
# these are ascii, extract these utf-8 to get fewer chunks (ascii may split on two-byte characters, for example)
for ss in all_ss_strings:
sec = pe.get_section_by_rva(ss.offset)
secname = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""
all_strings.append((secname, ss))

len_all_ss = 0
len_gostr = 0

gs_found = list()
results = list()
for secname, s in all_strings:
if secname != ".rdata":
continue

if len(s.string) <= 2800:
# This value was chosen based on experimentaion on different samples
# of go binaries that include versions 1.20, 1.18, 1.16, 1.12. and
# architectures amd64 and i386.
# See: https://github.com/mandiant/flare-floss/issues/807#issuecomment-1636087673
continue

len_all_ss += len(s.string)

orig_len = len(s.string)
sha256 = hashlib.sha256()
sha256.update(s.string.encode("utf-8"))
s_id = sha256.hexdigest()[:3].upper()
s_range = (s.offset, s.offset + len(s.string))

found = False
for gs in go_strings:
sec = pe.get_section_by_rva(gs.offset)
gs_sec = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""

if gs_sec != ".rdata":
continue

if (
gs.string
and gs.string in s.string
and gs_sec == secname
and s.offset <= gs.offset <= s.offset + orig_len
):
found = True
len_gostr += len(gs.string)

# remove found string data
idx = s.string.find(gs.string)
assert idx != -1
if idx == 0:
new_offset = s.offset + idx + len(gs.string)
else:
new_offset = s.offset

replaced_s = s.string.replace(gs.string, "", 1)
replaced_len = len(replaced_s)
s_trimmed = StaticString(
string=replaced_s,
offset=new_offset,
encoding=s.encoding,
)

type_ = "substring"
if s.string[: len(gs.string)] == gs.string and s.offset == gs.offset:
type_ = "exactsubstr"

results.append((secname, s_id, s_range, True, type_, s, replaced_len, gs))

s = s_trimmed

gs_found.append(gs)

if replaced_len < min_len:
results.append((secname, s_id, s_range, False, "missing", s, orig_len - replaced_len, gs))
break

if not found:
null = StaticString(string="", offset=0, encoding=StringEncoding.UTF8)
results.append((secname, s_id, s_range, False, "", s, 0, null))

rows = list()
for gs in go_strings:
sec = pe.get_section_by_rva(gs.offset)
gs_sec = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""
if gs_sec != ".rdata":
continue

if gs in gs_found:
continue

gsdata = gs.string
if len(gs.string) >= 50:
gsdata = gs.string[:36] + "...." + gs.string[-10:]
gsdata = sanitize(gsdata)

rows.append(
(
f"{gs_sec}",
f"",
f"",
f"{gs.offset:8x}",
f"",
f"unmatched go string",
f"",
f"",
f"{len(gs.string) if gs.string else ''}",
f"{gsdata}",
f"{hex(gs.offset) if gs.offset else ''}",
)
)

for r in results:
secname, s_id, s_range, found, msg, s, len_after, gs = r

sdata = s.string
if len(s.string) >= 50:
sdata = s.string[:36] + "...." + s.string[-10:]
sdata = sanitize(sdata)

gsdata = gs.string
if len(gs.string) >= 50:
gsdata = gs.string[:36] + "...." + gs.string[-10:]
gsdata = sanitize(gsdata)

len_info = f"{len(s.string):3d}"
if found:
len_info = f"{len(s.string):3d} > {len_after:3d} ({(len(s.string) - len_after) * -1:2d})"

rows.append(
(
f"{secname}",
f"<{s_id}>",
f"{s_range[0]:x} - {s_range[1]:x}",
f"{s.offset:8x}",
f"{found}",
f"{msg}",
len_info,
f"{sdata}",
f"{len(gs.string) if gs.string else ''}",
f"{gsdata}",
f"{hex(gs.offset) if gs.offset else ''}",
)
)

rows = sorted(rows, key=lambda t: t[3])

print(
tabulate.tabulate(
rows,
headers=[
"section",
"id",
"range",
"offset",
"found",
"msg",
"slen",
"string",
"gslen",
"gostring",
"gsoff",
],
tablefmt="psql",
)
)

print(".rdata only")
print("len all string chars:", len_all_ss)
print("len gostring chars :", len_gostr)
print(f"Percentage of string chars extracted: {round(100 * (len_gostr / len_all_ss))}%")
print()

return 100 * (len_gostr / len_all_ss)


def get_missed_strings(
all_ss_strings: List[StaticString], go_strings: List[StaticString], min_len: int
) -> List[StaticString]:
missed_strings = list()

for s in all_ss_strings:
orig_len = len(s.string)

found = False
for gs in go_strings:
if gs.string and gs.string in s.string and s.offset <= gs.offset <= s.offset + orig_len:
found = True

# remove found string data
idx = s.string.find(gs.string)
assert idx != -1
if idx == 0:
new_offset = s.offset + idx + len(gs.string)
else:
new_offset = s.offset

replaced_s = s.string.replace(gs.string, "", 1)
replaced_len = len(replaced_s)
s_trimmed = StaticString(
string=replaced_s,
offset=new_offset,
encoding=s.encoding,
)
s = s_trimmed

if replaced_len < min_len:
break

if not found:
missed_strings.append(s)

return missed_strings
# The value 2800 was chosen based on experimentaion on different samples
# of go binaries that include versions 1.20, 1.18, 1.16, 1.12. and
# architectures amd64 and i386.
# See: https://github.com/mandiant/flare-floss/issues/807#issuecomment-1636087673
get_extract_stats(pe, static_strings, go_strings, args.min_length, 2800)


if __name__ == "__main__":
Expand Down
Loading