Skip to content

Commit

Permalink
Scan: Add string reference collectors
Browse files Browse the repository at this point in the history
  • Loading branch information
praydog committed Dec 5, 2024
1 parent 61a04d4 commit faa6de4
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 0 deletions.
35 changes: 35 additions & 0 deletions include/utility/Scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,39 @@ namespace utility {
// Finds the function start given the middle, and then disassembles and stores all instructions until it hits the middle
// We can use this to "disassemble" backwards from the middle of an instruction
std::vector<Resolved> get_disassembly_behind(uintptr_t middle);

struct StringReference {
Resolved resolved{};
union {
const char* ascii{nullptr};
const wchar_t* unicode;
};

StringReference(const Resolved& resolved, const char* ascii) : resolved(resolved), ascii(ascii) {}
StringReference(const Resolved& resolved, const wchar_t* unicode) : resolved(resolved), unicode(unicode) {}
};

struct StringReferenceOptions {
bool follow_calls{false};
size_t min_length{1};
size_t max_length{256};

StringReferenceOptions& with_follow_calls(bool follow_calls) {
this->follow_calls = follow_calls;
return *this;
}

StringReferenceOptions& with_min_length(size_t min_length) {
this->min_length = min_length;
return *this;
}

StringReferenceOptions& with_max_length(size_t max_length) {
this->max_length = max_length;
return *this;
}
};

std::vector<StringReference> collect_ascii_string_references(uintptr_t start, size_t max_size, const StringReferenceOptions& options = {});
std::vector<StringReference> collect_unicode_string_references(uintptr_t start, size_t max_size, const StringReferenceOptions& options = {});
}
95 changes: 95 additions & 0 deletions src/Scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <ppl.h>
#include <intrin.h>

#include <cwctype>
#include <cstdint>
#include <unordered_set>
#include <deque>
Expand Down Expand Up @@ -2217,4 +2218,98 @@ namespace utility {

return out;
}

std::vector<StringReference> collect_ascii_string_references(uintptr_t start, size_t max_size, const StringReferenceOptions& options) {
std::vector<StringReference> out{};

utility::exhaustive_decode((uint8_t*)start, max_size, [&](ExhaustionContext& ctx) -> ExhaustionResult {
try {
if (!options.follow_calls && std::string_view{ctx.instrux.Mnemonic}.starts_with("CALL")) {
return ExhaustionResult::STEP_OVER;
}

const auto disp = utility::resolve_displacement(ctx.addr);

if (!disp) {
return ExhaustionResult::CONTINUE;
}

if (IsBadReadPtr((void*)*disp, 2)) {
return ExhaustionResult::CONTINUE;
}

auto c = (char*)*disp;

while (std::isprint(*c) && *c != '\0') {
const auto len = (uintptr_t)c - (uintptr_t)*disp;

if (len >= options.max_length) {
return ExhaustionResult::CONTINUE;
}

++c;
}

if (*c == '\0' && c != (char*)*disp) {
const auto len = (uintptr_t)c - (uintptr_t)*disp;

if (len >= options.min_length) {
out.emplace_back(Resolved{ctx.addr, ctx.instrux}, (char*)*disp);
}
}
} catch(...) {
}

return ExhaustionResult::CONTINUE;
});

return out;
}

std::vector<StringReference> collect_unicode_string_references(uintptr_t start, size_t max_size, const StringReferenceOptions& options) {
std::vector<StringReference> out{};

utility::exhaustive_decode((uint8_t*)start, max_size, [&](ExhaustionContext& ctx) -> ExhaustionResult {
try {
if (!options.follow_calls && std::string_view{ctx.instrux.Mnemonic}.starts_with("CALL")) {
return ExhaustionResult::STEP_OVER;
}

const auto disp = utility::resolve_displacement(ctx.addr);

if (!disp) {
return ExhaustionResult::CONTINUE;
}

if (IsBadReadPtr((void*)*disp, sizeof(wchar_t) * 2)) {
return ExhaustionResult::CONTINUE;
}

auto wc = (wchar_t*)*disp;

while (std::iswprint(*wc) && *wc != L'\0' && *wc >= 0x20 && *wc <= 0x7E) {
const auto len = ((uintptr_t)wc - (uintptr_t)*disp) / sizeof(wchar_t);

if (len >= options.max_length) {
return ExhaustionResult::CONTINUE;
}

++wc;
}

if (*wc == L'\0' && wc != (wchar_t*)*disp) {
const auto len = ((uintptr_t)wc - (uintptr_t)*disp) / sizeof(wchar_t);

if (len >= options.min_length) {
out.emplace_back(Resolved{ctx.addr, ctx.instrux}, (wchar_t*)*disp);
}
}
} catch(...) {
}

return ExhaustionResult::CONTINUE;
});

return out;
}
}
41 changes: 41 additions & 0 deletions test/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ class RTTITest {
return "size_t RTTITest::foo()";
}

static consteval const char* BAR_STRING() {
return "void RTTITest::some_function_that_has_strings() BAR";
}

static consteval const char* BAZ_STRING() {
return "void RTTITest::some_function_that_has_strings() BAZ";
}

static consteval const wchar_t* BAR_STRING_W() {
return L"void RTTITest::some_function_that_has_strings() BAR";
}

static consteval const wchar_t* BAZ_STRING_W() {
return L"void RTTITest::some_function_that_has_strings() BAZ";
}

RTTITest() {
std::cout << "RTTITest::RTTITest()" << std::endl;
}
Expand All @@ -36,6 +52,19 @@ class RTTITest {
return 0;
}

__declspec(noinline) static void some_function_that_has_strings() try {
printf("%s\n", BAR_STRING());
printf("%s\n", BAZ_STRING());
printf("%ls\n", BAR_STRING_W());
printf("%ls\n", BAZ_STRING_W());

throw std::runtime_error("This is a test exception");
} catch(const std::exception& e) {
std::cout << "RTTITest::some_function_that_has_strings() threw exception: " << e.what() << std::endl;
} catch(...) {
std::cout << "RTTITest::some_function_that_has_strings() threw unknown exception" << std::endl;
}

private:
};

Expand Down Expand Up @@ -234,6 +263,18 @@ int main() try {

// Do a BS scan on unallocated memory to see if our exception handling works
utility::scan_relative_reference(0, 10000, 12345);

const auto strs = utility::collect_ascii_string_references((uintptr_t)&RTTITest::some_function_that_has_strings, 1000, utility::StringReferenceOptions{}.with_min_length(4));

for (const auto& str : strs) {
std::cout << "Found string reference: " << str.ascii << " @ " << std::hex << str.resolved.addr << std::endl;
}

const auto wstrs = utility::collect_unicode_string_references((uintptr_t)&RTTITest::some_function_that_has_strings, 1000, utility::StringReferenceOptions{}.with_min_length(4));

for (const auto& str : wstrs) {
std::wcout << L"Found WIDE string reference: " << str.unicode << L" @ " << std::hex << str.resolved.addr << std::endl;
}

KANANLIB_ASSERT(test_avx2_displacement_scan() == 0);

Expand Down

0 comments on commit faa6de4

Please sign in to comment.