diff --git a/samples/__init__.py b/samples/__init__.py new file mode 100644 index 0000000000..dec34c1c80 --- /dev/null +++ b/samples/__init__.py @@ -0,0 +1,50 @@ +from typing import Optional, Dict + +import hashlib +import os +import urllib.request + +from refinery.units.crypto.cipher.aes import aes + + +class SampleStore: + cache: Dict[str, bytes] + + def __init__(self): + self.cache = {} + + def download(self, sha256hash: str, key: Optional[str] = None): + def tobytearray(r): + if isinstance(r, bytearray): + return r + return bytearray(r) + key = key or 'REFINERYTESTDATA' + key = key.encode('latin1') + sha256hash = sha256hash.lower() + req = urllib.request.Request( + F'https://github.com/binref/refinery-test-data/blob/master/{sha256hash}.enc?raw=true') + try: + with urllib.request.urlopen(req) as response: + encoded_sample = tobytearray(response.read()) + except Exception: + api = os.environ['MALSHARE_API'] + req = urllib.request.Request( + F'https://malshare.com/api.php?api_key={api}&action=getfile&hash={sha256hash}') + with urllib.request.urlopen(req) as response: + result = tobytearray(response.read()) + else: + result = encoded_sample | aes(mode='CBC', key=key) | bytearray + if not result or hashlib.sha256(result).hexdigest().lower() != sha256hash: + raise ValueError('sample did not decode correctly') + self.cache[sha256hash] = result + return result + + def get(self, sha256hash: str, key: Optional[str] = None): + for cached, value in self.cache.items(): + if cached.casefold() == sha256hash.casefold(): + return value + else: + return self.download(sha256hash, key) + + def __getitem__(self, sha256hash: str): + return self.get(sha256hash) diff --git a/strip-tutorials.py b/strip-tutorials.py new file mode 100644 index 0000000000..dd3102fefd --- /dev/null +++ b/strip-tutorials.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Strips the Jupyter Notebooks in the Tutorial section of run count information. +""" +import json +import pathlib + +for path in pathlib.Path.cwd().glob('./tutorials/*.ipynb'): + with path.open('r') as fd: + notebook = json.load(fd) + for cell in notebook['cells']: + cell.pop('execution_count', None) + with path.open('w') as fd: + json.dump(notebook, fd, indent=1) diff --git a/test/__init__.py b/test/__init__.py index 469a79fd4d..f152d1c16d 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,60 +1,13 @@ -from typing import Optional, Dict - -import hashlib import logging -import os import random import refinery import string import unittest -import urllib.request - -from refinery.units.crypto.cipher.aes import aes - -__all__ = ['refinery', 'TestBase', 'NameUnknownException'] +from samples import SampleStore -class SampleStore: - cache: Dict[str, bytes] - def __init__(self): - self.cache = {} - - def download(self, sha256hash: str, key: Optional[str] = None): - def tobytearray(r): - if isinstance(r, bytearray): - return r - return bytearray(r) - key = key or 'REFINERYTESTDATA' - key = key.encode('latin1') - sha256hash = sha256hash.lower() - req = urllib.request.Request( - F'https://github.com/binref/refinery-test-data/blob/master/{sha256hash}.enc?raw=true') - try: - with urllib.request.urlopen(req) as response: - encoded_sample = tobytearray(response.read()) - except Exception: - api = os.environ['MALSHARE_API'] - req = urllib.request.Request( - F'https://malshare.com/api.php?api_key={api}&action=getfile&hash={sha256hash}') - with urllib.request.urlopen(req) as response: - result = tobytearray(response.read()) - else: - result = encoded_sample | aes(mode='CBC', key=key) | bytearray - if not result or hashlib.sha256(result).hexdigest().lower() != sha256hash: - raise ValueError('sample did not decode correctly') - self.cache[sha256hash] = result - return result - - def get(self, sha256hash: str, key: Optional[str] = None): - for cached, value in self.cache.items(): - if cached.casefold() == sha256hash.casefold(): - return value - else: - return self.download(sha256hash, key) - - def __getitem__(self, sha256hash: str): - return self.get(sha256hash) +__all__ = ['refinery', 'TestBase', 'NameUnknownException'] class NameUnknownException(Exception): diff --git a/tutorials/boilerplate.py b/tutorials/boilerplate.py index 5e0bc9d235..8a612a19d0 100644 --- a/tutorials/boilerplate.py +++ b/tutorials/boilerplate.py @@ -13,17 +13,19 @@ import hashlib import logging import re +import requests import shlex import getpass - -os.environ['REFINERY_TERM_SIZE'] = '120' -os.environ['REFINERY_COLORLESS'] = '1' +if True: + os.environ['REFINERY_TERM_SIZE'] = '120' + os.environ['REFINERY_COLORLESS'] = '1' from refinery.lib.meta import SizeInt from refinery.lib.loader import load_pipeline from refinery.units import Executable, Unit -from test import SampleStore + +from samples import SampleStore logging.disable(logging.CRITICAL) Executable.Entry = '__DEMO__' @@ -203,6 +205,18 @@ def show(line: str): return Image(filename=line.strip()) +@register_cell_magic +def cat(line: str, cell=None): + cat, _, out = line.partition('>') + cat, _, eof = cat.partition('<<') + out = out.strip() + eof = eof.strip() + cell = cell or '' + cell, _, _ = cell.partition(eof) + cell = cell.strip() + store.cache[out] = cell.encode('utf8') + + def store_sample(hash: str, name: Optional[str] = None, key: Optional[str] = None): store.download(hash, key=key) if name is None: @@ -212,3 +226,14 @@ def store_sample(hash: str, name: Optional[str] = None, key: Optional[str] = Non def store_clear(): store.cache.clear() + + +@register_line_magic +def flare(line: str): + name, _, pattern = line.strip().partition(' ') + url = F'https://www.awarenetwork.org/home/outlaw/ctfs/flareon/{name}' + store_clear() + store.cache[name] = requests.get(url).content + emit(F'{name} | xt7z {pattern} [| dump {{path}} ]') + rm(name) + ls() diff --git a/tutorials/tbr-files.v0x01.netwalker.dropper.ipynb b/tutorials/tbr-files.v0x01.netwalker.dropper.ipynb index d5f7e8c55b..2afe2c16ee 100644 --- a/tutorials/tbr-files.v0x01.netwalker.dropper.ipynb +++ b/tutorials/tbr-files.v0x01.netwalker.dropper.ipynb @@ -1,881 +1,859 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The Refinery Files 0x01: NetWalker Dropper\n", - "\n", - "This is the first tutorial on how to use the [binary refinery][refinery] (or binref for short). It is a command-line toolkit inspired by [cyberchef][], where Unix-style [pipelines][pipeline] are used to combine various transformations. The intended use case is malware triage and analysis. We will be looking at the file with the following SHA-256 hash:\n", - "```\n", - "ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9\n", - "```\n", - "Spoiler Alert: It contains a NetWalker Ransomware sample. This is **malware**, do not execute it unless you know exactly what you are doing.\n", - "\n", - "[refinery]: https://github.com/binref/refinery/\n", - "[cyberchef]: https://github.com/gchq/CyberChef\n", - "[pipeline]: https://en.wikipedia.org/wiki/Pipeline_(Unix)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation\n", - "\n", - "You can setup binary refinery like this in a temporary virtual environment:\n", - "```\n", - "$ python3 -m venv br\n", - "$ source ./br/bin/activate\n", - "(br) $ pip3 install -U git+git://github.com/binref/refinery.git\n", - " ... PIP MAKES WAR UPON THE FORCES OF DEPENDENCY HELL ...\n", - "(br) $ \n", - "```\n", - "This Jupyter notebook uses [dark magic](boilerplate.py) to simulate working in a directory with a single file named `nl.ps1`, which has the SHA-256 hash mentioned above. Running this Jupyter notebook locally will cache all files in memory, and none of the malware samples will actually be written to your hard drive. The reason this file is a Jupyter notebook is primarily so that it can be re-run, making sure that the output of the below refinery commands accurately reflects what you would see when using the most recent version of the toolkit." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials import boilerplate\n", - "boilerplate.store_sample('ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9', 'nl.ps1')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n" - ] - } - ], - "source": [ - "%ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extracting The Payload\n", - "\n", - "Our guest today is a PowerShell sample. A brief look into the file reveals that it contains large buffers encoded as arrays of hexadecimal integers, likely byte values. Because we assume that these buffers contain some sort of payload, we'll go ahead and use the [carve][] unit to get them out. The main documentation of refinery units is in their `-h` or `--help` output on the command line. The [carve][] unit has a lot of options, but we will only use two:\n", - "```\n", - "carve -s intarray\n", - "```\n", - "The flag `-s` is a shorthand for `--single` which instructs the unit to carve only the largest buffer it can find. The only required argument is the word `intarray`, which denotes the format that we want to carve. The `intarray` format represents a pattern for arrays of integers. We will pipe the result of this operation to the [peek][] unit, which gives us a brief preview of what was extracted. We use the `-d` (aka `--decode`) switch for [peek][] because the result should be plaintext:\n", - "\n", - "[peek]: https://binref.github.io/#refinery.peek\n", - "[carve]: https://binref.github.io/#refinery.carve" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "00.594 MB; 40.34% entropy; ASCII text, with very long lines, with no line terminators\n", - "---------------------------------------------------------------------------------------------------------------[utf8]---\n", - "0xfd,0xea,0x20,0xb0,0xb3,0xb0,0xb0,0xb0,0xb4,0xb0,0xb0,0xb0,0x4f,0x4f,0xb0,0xb0,0x08,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", - "0xf0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", - "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0x70,0xb0,0xb0,0xb0,0xbe,0xaf,0x0a,0xbe,0xb0,0x04,0xb9,0x7d,\n", - "0x91,0x08,0xb1,0xfc,0x7d,0x91,0xe4,0xd8,0xd9,0xc3,0x90,0xc0,0xc2,0xdf,0xd7,0xc2,0xd1,0xdd,0x90,0xd3,0xd1,0xde,0xde,0xdf,\n", - "0xc4,0x90,0xd2,0xd5,0x90,0xc2,0xc5,0xde,0x90,0xd9,0xde,0x90,0xf4,0xff,0xe3,0x90,0xdd,0xdf,0xd4,0xd5,0x9e,0xbd,0xbd,0xba,\n", - "0x94,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", - "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", - "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", - "0xe0,0xf5,0xb0,0xb0,0xd4,0x36,0xb6,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0x40,0xb0,0x92,0x90,\n", - "0xbb,0xb2,0xbe,0xa0,0xb0,0xc6,0xb1,0xb0,0xb0,0xe6,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xe0,0x9b,0xb1,0xb0,0xb0,0xa0,0xb0,0xb0,\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -s intarray | peek -dd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alright, this looks exactly like the buffer we are interested in. Let us now decode this. The unit to turn textual representations of integers to bytes is called [pack][]. I will no longer use the `-d` switch for [peek][] because I don't expect the result to be printable any more:\n", - "\n", - "[pack]: https://binref.github.io/#refinery.pack\n", - "[peek]: https://binref.github.io/#refinery.peek" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; data\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: FD EA 20 B0 B3 B0 B0 B0 B4 B0 B0 B0 4F 4F B0 B0 08 B0 B0 B0 B0 B0 B0 B0 F0 B0 B0 B0 ............OO..............\n", - "0001C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "00038: B0 B0 B0 B0 70 B0 B0 B0 BE AF 0A BE B0 04 B9 7D 91 08 B1 FC 7D 91 E4 D8 D9 C3 90 C0 ....p..........}....}.......\n", - "00054: C2 DF D7 C2 D1 DD 90 D3 D1 DE DE DF C4 90 D2 D5 90 C2 C5 DE 90 D9 DE 90 F4 FF E3 90 ............................\n", - "00070: DD DF D4 D5 9E BD BD BA 94 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "0008C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "000A8: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 E0 F5 B0 B0 ............................\n", - "000C4: D4 36 B6 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 40 B0 92 90 BB B2 BE A0 B0 C6 B1 B0 .6..............@...........\n", - "000E0: B0 E6 B0 B0 B0 B0 B0 B0 E0 9B B1 B0 B0 A0 B0 B0 B0 B0 B0 30 B1 B0 B0 B0 B0 A0 B0 B0 ...................0........\n", - "000FC: B0 B2 B0 B0 B6 B0 B0 B0 B0 B0 B0 B0 B5 B0 B0 B0 B0 B0 B0 B0 B0 90 B2 B0 B0 B4 B0 B0 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -s intarray | pack | peek" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The trained malware analyst easily spots the repeated byte `0xB0` and suspects a single-byte XOR encryption. To XOR the entire extracted buffer with `0xB0`, we use, well, the [xor][] unit:\n", - "\n", - "[peek]: https://binref.github.io/#refinery.peek\n", - "[xor]: https://binref.github.io/#refinery.xor" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", - "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", - "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", - "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -s intarray | pack | xor 0xB0 | peek" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alright, this looks like the payload, and we can now [dump][] it to disk:\n", - "\n", - "[dump]: https://binref.github.io/#refinery.dump" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "%emit nl.ps1 | carve -s intarray | pack | xor 0xB0 | dump payload.dll" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n", - "00.119 MB 419ab9eaa1c64eed1d6d005ebc0c30bdc4e949ea7ee2cfee5dd34e6b3915bc02 payload.dll\n" - ] - } - ], - "source": [ - "%ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extracting The Other Payload\n", - "\n", - "Looking at the extracted executable, it's only 119kB in size. The hexadecimal encoding in the PowerShell script constitutes a blowup by 5: The byte `0` becomes `0x00` plus a comma character. However, even considering that, we only get 594kB even though the loader script is 926kB in size. What of the other 331kB? Scrolling though the file it becomes obvious that there is another buffer in there.\n", - "\n", - "In this section, we will use the framing syntax to extract both buffers. All refinery units can, in principle, produce multiple outputs for one given input. By default, multiple outputs are separated by line break characters. For example, we can use [carve][] with the `printable` format option to extract all printable strings from the payload. I use the options `--min` and `--max` to only return strings of length at least `20` and at most `100`:\n", - "\n", - "[carve]: https://binref.github.io/#refinery.carve" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "!This program cannot be run in DOS mode.\n", - "$\n", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/\n", - " !\"#$%&'()*+,-./0123\n", - "expand 32-byte kexpand 16-byte k\n", - "Launcher.SystemSettings\n" - ] - } - ], - "source": [ - "%emit payload.dll | carve --min=20 --max=100 printable" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The [carve][] unit has carved 5 printable substrings and they were separated by line breaks in the output. Since we have used the `-s` switch to carve the largest payload, the [carve][] unit only had a single output in our previous example. However, it can easily be used to extract the two longest matching patterns by specifying the arguments `--longest` and `--take=2`, or `-lt2` for short. We are certainly not interested in having those buffers be printed to the command line, and we do not want them separated by line breaks for any other reason either. We would like to do additional processing on each one of them individually. Let's find out how to do that.\n", - "\n", - "By adding the symbol `[` as the last argument to a refinery unit, you instruct all subsequent refinery units to work on each of the outputs individually and in sequence. Such a stream of multiple items is called a **frame**, and the items themselves are referred to as **chunks**. Internally, this simply means that when a unit receives the `[` argument, the output is serialized in a refinery-specific format so that subsequent units can understand it as a stream of multiple outputs rather than just a single blob. The last unit that performs processing inside the frame should receive the symbol `]` as its last argument: This instructs the unit to concatenate all chunks. When chunks are merged at the end of a frame, no line breaks or other separators are inserted. See also the [module documentation for the frame module][frame].\n", - "\n", - "The following example reads our sample, then [carve][]s the two largest integer array buffers from it, converts this to binary, and then [peek][]s the results:\n", - "\n", - "[carve]: https://binref.github.io/#refinery.carve\n", - "[peek]: https://binref.github.io/#refinery.peek\n", - "[frame]: https://binref.github.io/lib/frame.html" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "60.928 kB; 83.07% entropy; data\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 0A 1D D7 47 44 47 47 47 43 47 47 47 B8 B8 47 47 FF 47 47 47 47 47 47 47 07 47 47 47 ...GDGGGCGGG..GG.GGGGGGG.GGG\n", - "0001C: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 GGGGGGGGGGGGGGGGGGGGGGGGGGGG\n", - "00038: 47 47 47 47 FF 47 47 47 49 58 FD 49 47 F3 4E 8A 66 FF 46 0B 8A 66 13 2F 2E 34 67 37 GGGG.GGGIX.IG.N.f.F..f./.4g7\n", - "00054: 35 28 20 35 26 2A 67 24 26 29 29 28 33 67 25 22 67 35 32 29 67 2E 29 67 03 08 14 67 5(.5&*g$&))(3g%\"g52)g.)g...g\n", - "00070: 2A 28 23 22 69 4A 4A 4D 63 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 *(#\"iJJMcGGGGGGGGGGGGGGGGGGG\n", - "0008C: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 GGGGGGGGGGGGGGGGGGGGGGGGGGGG\n", - "000A8: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 17 02 47 47 0B 46 42 47 47 47 47 47 GGGGGGGGGGGGGGGG..GG.FBGGGGG\n", - "000C4: 47 47 47 47 47 47 47 47 A7 47 45 66 4C 46 49 57 47 81 47 47 47 63 47 47 47 47 47 47 GGGGGGGG.GEfLFIWG.GGGcGGGGGG\n", - "000E0: 87 EC 47 47 47 57 47 47 47 A7 47 47 47 47 47 57 47 57 47 47 47 45 47 47 41 47 47 47 ..GGGWGGG.GGGGGWGWGGGEGGAGGG\n", - "000FC: 47 47 47 47 42 47 47 47 47 47 47 47 47 77 46 47 47 43 47 47 47 47 47 47 45 47 07 43 GGGGBGGGGGGGGwFGGCGGGGGGEG.C\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; data\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: FD EA 20 B0 B3 B0 B0 B0 B4 B0 B0 B0 4F 4F B0 B0 08 B0 B0 B0 B0 B0 B0 B0 F0 B0 B0 B0 ............OO..............\n", - "0001C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "00038: B0 B0 B0 B0 70 B0 B0 B0 BE AF 0A BE B0 04 B9 7D 91 08 B1 FC 7D 91 E4 D8 D9 C3 90 C0 ....p..........}....}.......\n", - "00054: C2 DF D7 C2 D1 DD 90 D3 D1 DE DE DF C4 90 D2 D5 90 C2 C5 DE 90 D9 DE 90 F4 FF E3 90 ............................\n", - "00070: DD DF D4 D5 9E BD BD BA 94 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "0008C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", - "000A8: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 E0 F5 B0 B0 ............................\n", - "000C4: D4 36 B6 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 40 B0 92 90 BB B2 BE A0 B0 C6 B1 B0 .6..............@...........\n", - "000E0: B0 E6 B0 B0 B0 B0 B0 B0 E0 9B B1 B0 B0 A0 B0 B0 B0 B0 B0 30 B1 B0 B0 B0 B0 A0 B0 B0 ...................0........\n", - "000FC: B0 B2 B0 B0 B6 B0 B0 B0 B0 B0 B0 B0 B5 B0 B0 B0 B0 B0 B0 B0 B0 90 B2 B0 B0 B4 B0 B0 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | peek ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have a bit of a problem here: It looks like different keys were used to encrypt the two payloads. The first buffer was encrypted using the byte `0x47` while the second one (the one we already saw before) was encrypted using `0xB0`. There are several ways to do this in transit, but we will get to that later. For now, let's just [dump][] the two buffers to disk and deal with them individually:\n", - "\n", - "[dump]: https://binref.github.io/#refinery.dump" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | dump encrypted-0x47.bin encrypted-0xB0.bin ]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n", - "00.119 MB 419ab9eaa1c64eed1d6d005ebc0c30bdc4e949ea7ee2cfee5dd34e6b3915bc02 payload.dll\n", - "60.928 kB 120101d5f020c8810074fc65aa2b75c237b3535d16a220e52af108dba9f40f85 encrypted-0x47.bin\n", - "00.119 MB 285709f0c66b0d33154bcad6d8e43860dde7bcc63945fc53aeca1cb76d71b18d encrypted-0xB0.bin\n" - ] - } - ], - "source": [ - "%ls" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "%emit encrypted-0x47.bin | xor 0x47 | dump payload1.dll" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "%emit encrypted-0xB0.bin | xor 0xB0 | dump payload2.dll" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A quick sanity check to make sure we used the right keys:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - " entropy = 83.07%\n", - " magic = PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", - " size = 60.928 kB\n", - "------------------------------------------------------------------------------------------------------------------------\n", - " entropy = 80.32%\n", - " magic = PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - " size = 00.119 MB\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit payload1.dll payload2.dll [| peek -ml0 ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The [emit][] unit emits one chunk for each file that it reads from disk, in this case it will read the two DLL files and produce two chunks. We used `--lines=0` aka `-l0` options of [peek][] to get only a brief summary of their metadata to check that they decrypted to valid PE files. Only a few hours of reverse engineering later, you will be able to confirm your suspicion that `payload1.dll` is the 32bit variant of `payload2.dll`. The loader will deploy one or the other depending on the system architecture.\n", - "\n", - "[emit]: https://binref.github.io/#refinery.mit\n", - "[peek]: https://binref.github.io/#refinery.peek" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extracting Both Payloads\n", - "\n", - "We will now show how to decrypt the two payloads in transit, i.e. without temporarily writing the encrypted buffers to disk. This is a great opportunity to illustrate a powerful feature of refinery. As a spoiler, here's a way to decrypt the two buffers without dumping them to disk:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", - "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", - "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", - "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", - "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", - "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", - "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | xor copy:3 | peek ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, we are passing the argument `copy:3` to the [xor][] unit. It does work, but we'll have to dig a little deeper to understand what is going on and why it works.\n", - "\n", - "[xor]: https://binref.github.io/#refinery.xor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multibin Arguments\n", - "\n", - "In previous examples, we have called [xor][] with the arguments `0x47` and `0xB0`, which refinery interpreted as an integer representing a byte value with which to xor every byte in the input stream. However, you could also write any of the following:\n", - "\n", - "- `xor h:dff4f503bb` - xor with the hexadecimal encoded byte sequence `DFF4F503BB`\n", - "- `xor s:terrordome` - xor with the utf8-encoded string `terrordome`\n", - "- `xor 7,3,12,120,8` - xor with the given sequence of values, i.e. `07030C7808` in hexadecimal\n", - "\n", - "These are all examples of so-called **multibin** arguments. A multibin argument starts with a number of **handlers**. A **handler** is a short identifier separated from the rest of the expression by a colon. In the above examples, `h` (for **hex**) and `s` (for **string**) are handlers. Most handlers will process the remaining expression as a multibin again, but both `h` and `s` are **final** handlers, which means that the remaining expression will not be parsed any further. This gives you two very certain ways to pass data to a refinery unit in case you are uncertain about potential multibin parsing:\n", - "```\n", - "(br) $ emit h:\n", - "(br) $ emit s:h:\n", - "h:\n", - "(br) $ emit h:s:\n", - "usage: emit [-h] [-L] [-Q] [-0] [-v] [data [data ...]]\n", - "emit: error: argument data: invalid multibin value: 'h:s:'\n", - "```\n", - "The first emits the empty hexadecimal string (which is empty), the second emits the utf8-string `h:`, and the third example tries to emit the hexadecminal string `s:`, which is nonsense, because neither `s` nor `:` are hexadecimal characters. We get a well-deserved error. When no handlers are given, a multibin value is evaluated based on its default handler:\n", - "\n", - "- Most units use the standard default handler: It first attempts to interpret the given argument as a file name and will use the contents of that file if it exists. If that fails, it will encode the string to a byte sequence using UTF8.\n", - "- Arithmetic and bitwise block operations (like [xor][], [sub][], [add][], [shr][], [shl][], [rotr][], [rotl][], [neg][]) will attempt to interpret the given argument as an Python expression representing an integer or a sequence of integers. Only when this fails, they revert to the standard default handler.\n", - "- The regular expression units [rex][], [resub][], and [resplit][] do not try to open any files, and they also provide a few additional handlers.\n", - "\n", - "The module documentation of the [argformats][] module contains all handlers and documents their purpose.\n", - "\n", - "[argformats]: https://binref.github.io/lib/argformats.html\n", - "\n", - "[emit]: https://binref.github.io/#refinery.emit\n", - "[rotl]: https://binref.github.io/#refinery.rotl\n", - "[rotr]: https://binref.github.io/#refinery.rotr\n", - "[rex]: https://binref.github.io/#refinery.rex\n", - "[shl]: https://binref.github.io/#refinery.shl\n", - "[shr]: https://binref.github.io/#refinery.shr\n", - "[sub]: https://binref.github.io/#refinery.sub\n", - "[add]: https://binref.github.io/#refinery.add\n", - "[xor]: https://binref.github.io/#refinery.xor\n", - "[neg]: https://binref.github.io/#refinery.neg\n", - "[drp]: https://binref.github.io/#refinery.drp\n", - "[resplit]: https://binref.github.io/#refinery.resplit\n", - "[resub]: https://binref.github.io/#refinery.resub" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The Copy Handler\n", - "\n", - "In our example, we used the `copy` handler for the argument to [xor][]. This handler is final, just like `s` and `h`. It also has the short version `c`, so you could just as well write the following to decrypt both buffers:\n", - "\n", - "[xor]: https://binref.github.io/#refinery.xor" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", - "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", - "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", - "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", - "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", - "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", - "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | xor c:3 | peek ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `copy` handler is final. It parses the remaining expression as a Python index expression with support for slice notation; the value of the argument will be the corresponding slice copied from the input to the currently operating unit. In our example, we simply want to use the **fourth byte of the input** (i.e. the one at index `3`) to be used as the XOR key. Just to demonstrate, we could equally well have copied bytes 5, 6, and 7 (all of which decrypt to zero bytes):" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", - "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", - "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", - "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", - "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", - "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", - "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | xor c:5:8 | peek ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, `5:8` is the Python slice starting at index `5` and stopping before reaching `8`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Unit Handlers\n", - "\n", - "Every available refinery unit can also be used as a handler. Using `copy:3` as the decryption key does work very well, but we can more succinctly express the heuristic that we used. The [drp][] unit finds and detects frequently repeating patterns in its input data. Hence, if you suspect a single byte XOR to have been used on a buffer that contains a lot of zero bytes (like a PE file), the following will work:\n", - "\n", - "[drp]: https://binref.github.io/#refinery.drp" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", - "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", - "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", - "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", - "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", - "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", - "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", - "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", - "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", - "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", - "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", - "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -lt2 intarray [| pack | xor drp:c::100 | peek ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The argument to [xor][] now first copies the first 100 bytes from the input using `c::100`. These bytes are passed to the [drp][] unit, which will extract the most frequent repeating byte pattern from it. In our example, the patterns are just single bytes, but this method can also work for longer XOR keys.\n", - "\n", - "[xor]: https://binref.github.io/#refinery.xor\n", - "[drp]: https://binref.github.io/#refinery.drp" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extracting The Configuration\n", - "\n", - "The Netwalker configuration is stored as an RC4 encrypted buffer in a resource called `31337`, which is usually the only PE resource of the file. The buffer starts with a 32bit integer specifying the key length, followed by the key, followed by the encrypted data:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "05.434 kB; 99.57% entropy; data\n", - "------------------------------------------------------------------------------------------------------------------------\n", - "00000: 05 00 00 00 73 23 44 6F 38 8D 3E 4C 31 50 31 BE 51 16 7B 33 81 7A 34 2F 77 50 44 6F ....s#Do8.>L1P1.Q.{3.z4/wPDo\n", - "0001C: 8B DB 55 0A 1D BC F4 5D 23 C6 E1 26 D4 FB FF FD 0D E1 34 4F 08 F5 2C A1 2D C4 7C 04 ..U....]#..&......4O..,.-.|.\n", - "00038: D4 BC 70 BB 47 CA 6C 2D E5 3A 45 B6 92 52 74 85 58 69 52 CB 9E 70 C2 26 32 0D 5A 0C ..p.G.l-.:E..Rt.XiR..p.&2.Z.\n", - "00054: 0A D6 65 1F 8E 87 90 77 5E 4A C8 AA EA 56 FD A4 94 FF BB 9F 16 83 4B A7 16 33 00 9E ..e....w^J...V........K..3..\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc | peek -l4" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The decrypted configuration is in JSON format. The following is how we can extract the Netwalker configuration from this dropper without ever writing a single intermediate result to disk:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - "08.867 kB; 55.91% entropy; ASCII text, with very long lines\n", - "---------------------------------------------------------------------------------------------------------------[utf8]---\n", - "{\n", - " \"mpk\": \"kzo1XdPfYBYrIPNqwr7YxsVS2rzbhlHusvwLlbNVowc=\",\n", - " \"mode\": 0,\n", - " \"spsz\": 4,\n", - " \"thr\": 1500,\n", - " \"namesz\": 8,\n", - " \"idsz\": 6,\n", - " \"pers\": false,\n", - " \"onion1\": \"pb36hu4spl6cyjdfhing7h3pw6dhpk32ifemawkujj4gp33ejzdq3did.onion\",\n", - " \"onion2\": \"rnfdsgm6wb6j6su5txkekw4u4y47kp2eatvu7d6xhyn5cs4lt4pdrqqd.onion\",\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 [| carve -ds intarray | xor c:3 | perc | put k le:x::4 | rc4 x::k ]| ppjson | peek -d" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A lot of this already makes sense to us, but a few new things are happening, too. Firstly, we have used the `-d` (short for `--decode`) flag of [carve][]. For most patterns, there is an obvious decoding algorithm, and [carve][] can apply this decoding automatically. In the case of the `intarray` format, the [pack][] unit is invoked. After decrypting the payload, we use the [perc][] unit to extract all PE resources. We can use the `--list` option to get a list of all PE resources in the buffer:\n", - "\n", - "[perc]: https://binref.github.io/#refinery.perc\n", - "[pack]: https://binref.github.io/#refinery.pack\n", - "[carve]: https://binref.github.io/#refinery.carve" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1337/31337/0\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc -l" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since there is only one, we can simply continue processing the complete output of [perc][]. In general, [perc][] can be given a wildcard expression to select only the resources you are interested in, and [perc][] will then extract each of those as one output chunk. The next unit is where it gets interesting. We run the [put][] unit with the parameters `k` and `le:x::4`, and then we process the result using the [rc4][] unit with the argument `x::k`. You may have already guessed it, `k` is a variable containing the length of the RC4 key.\n", - "\n", - "Chunks in a refinery frame can carry a dictionary of metadata, also referred to as **meta variables**. As usual, it is recommended to also read [the official documentation about meta variables][meta]. There are a few units that can generate meta variables, and [put][] is likely the most straightforward way to do so. The [put][] unit takes as its first argument the name of the variable and as its second argument some multibin expression to store in that variable. In this case, we store `le:x::4`, which cuts out the first 4 bytes and decodes them to an integer using little-endian encoding (that's what the `le` handler does). From this point on, the variable `k` is available in the frame and can be used as part of multibin expressions. The [peek][] unit displays the contents of all meta variables that are present on a chunk; in this case there are two variables:\n", - "\n", - "[put]: https://binref.github.io/#refinery.put\n", - "[rc4]: https://binref.github.io/#refinery.rc4\n", - "[perc]: https://binref.github.io/#refinery.perc\n", - "[peek]: https://binref.github.io/#refinery.peek\n", - "\n", - "[meta]: https://binref.github.io/lib/meta.html" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - " k = 5\n", - " lcid = Neutral Locale Language\n", - " offset = 0x1B858\n", - " path = 1337/31337/0\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc [| put k le:x::4 | peek -l0 ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here you can see that the [perc][] unit has also attached a piece of metadata to the chunk, namely the path of the resource that it extracted. \n", - "\n", - "[perc]: https://binref.github.io/#refinery.perc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "Congratulations, you made it! This tutorial has introduced **framing syntax**, **multibin handlers**, and **meta variables**, and these are all the core concepts of binary refinery toolkit. In combination, they can perform a fairly broad range of data transformations. Future tutorials will focus on extending the binary refinery with custom units and using refinery units within Python code. Stay tuned!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.7 ('venv')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "4bb4b02bf57a2c25456a741474d02d2de926aec3c451f22b312ec34f66909bb4" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The Refinery Files 0x01: NetWalker Dropper\n", + "\n", + "This is the first tutorial on how to use the [binary refinery][refinery] (or binref for short). It is a command-line toolkit inspired by [cyberchef][], where Unix-style [pipelines][pipeline] are used to combine various transformations. The intended use case is malware triage and analysis. We will be looking at the file with the following SHA-256 hash:\n", + "```\n", + "ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9\n", + "```\n", + "Spoiler Alert: It contains a NetWalker Ransomware sample. This is **malware**, do not execute it unless you know exactly what you are doing.\n", + "\n", + "[refinery]: https://github.com/binref/refinery/\n", + "[cyberchef]: https://github.com/gchq/CyberChef\n", + "[pipeline]: https://en.wikipedia.org/wiki/Pipeline_(Unix)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation\n", + "\n", + "You can setup binary refinery like this in a temporary virtual environment:\n", + "```\n", + "$ python3 -m venv br\n", + "$ source ./br/bin/activate\n", + "(br) $ pip3 install -U git+git://github.com/binref/refinery.git\n", + " ... PIP MAKES WAR UPON THE FORCES OF DEPENDENCY HELL ...\n", + "(br) $ \n", + "```\n", + "This Jupyter notebook uses [dark magic](boilerplate.py) to simulate working in a directory with a single file named `nl.ps1`, which has the SHA-256 hash mentioned above. Running this Jupyter notebook locally will cache all files in memory, and none of the malware samples will actually be written to your hard drive. The reason this file is a Jupyter notebook is primarily so that it can be re-run, making sure that the output of the below refinery commands accurately reflects what you would see when using the most recent version of the toolkit." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "from tutorials import boilerplate\n", + "boilerplate.store_sample('ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9', 'nl.ps1')" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n" + ] + } + ], + "source": [ + "%ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extracting The Payload\n", + "\n", + "Our guest today is a PowerShell sample. A brief look into the file reveals that it contains large buffers encoded as arrays of hexadecimal integers, likely byte values. Because we assume that these buffers contain some sort of payload, we'll go ahead and use the [carve][] unit to get them out. The main documentation of refinery units is in their `-h` or `--help` output on the command line. The [carve][] unit has a lot of options, but we will only use two:\n", + "```\n", + "carve -s intarray\n", + "```\n", + "The flag `-s` is a shorthand for `--single` which instructs the unit to carve only the largest buffer it can find. The only required argument is the word `intarray`, which denotes the format that we want to carve. The `intarray` format represents a pattern for arrays of integers. We will pipe the result of this operation to the [peek][] unit, which gives us a brief preview of what was extracted. We use the `-d` (aka `--decode`) switch for [peek][] because the result should be plaintext:\n", + "\n", + "[peek]: https://binref.github.io/#refinery.peek\n", + "[carve]: https://binref.github.io/#refinery.carve" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "00.594 MB; 40.34% entropy; ASCII text, with very long lines, with no line terminators\n", + "---------------------------------------------------------------------------------------------------------------[utf8]---\n", + "0xfd,0xea,0x20,0xb0,0xb3,0xb0,0xb0,0xb0,0xb4,0xb0,0xb0,0xb0,0x4f,0x4f,0xb0,0xb0,0x08,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", + "0xf0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", + "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0x70,0xb0,0xb0,0xb0,0xbe,0xaf,0x0a,0xbe,0xb0,0x04,0xb9,0x7d,\n", + "0x91,0x08,0xb1,0xfc,0x7d,0x91,0xe4,0xd8,0xd9,0xc3,0x90,0xc0,0xc2,0xdf,0xd7,0xc2,0xd1,0xdd,0x90,0xd3,0xd1,0xde,0xde,0xdf,\n", + "0xc4,0x90,0xd2,0xd5,0x90,0xc2,0xc5,0xde,0x90,0xd9,0xde,0x90,0xf4,0xff,0xe3,0x90,0xdd,0xdf,0xd4,0xd5,0x9e,0xbd,0xbd,0xba,\n", + "0x94,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", + "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", + "0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,\n", + "0xe0,0xf5,0xb0,0xb0,0xd4,0x36,0xb6,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0x40,0xb0,0x92,0x90,\n", + "0xbb,0xb2,0xbe,0xa0,0xb0,0xc6,0xb1,0xb0,0xb0,0xe6,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xe0,0x9b,0xb1,0xb0,0xb0,0xa0,0xb0,0xb0,\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -s intarray | peek -dd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alright, this looks exactly like the buffer we are interested in. Let us now decode this. The unit to turn textual representations of integers to bytes is called [pack][]. I will no longer use the `-d` switch for [peek][] because I don't expect the result to be printable any more:\n", + "\n", + "[pack]: https://binref.github.io/#refinery.pack\n", + "[peek]: https://binref.github.io/#refinery.peek" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; data\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: FD EA 20 B0 B3 B0 B0 B0 B4 B0 B0 B0 4F 4F B0 B0 08 B0 B0 B0 B0 B0 B0 B0 F0 B0 B0 B0 ............OO..............\n", + "0001C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "00038: B0 B0 B0 B0 70 B0 B0 B0 BE AF 0A BE B0 04 B9 7D 91 08 B1 FC 7D 91 E4 D8 D9 C3 90 C0 ....p..........}....}.......\n", + "00054: C2 DF D7 C2 D1 DD 90 D3 D1 DE DE DF C4 90 D2 D5 90 C2 C5 DE 90 D9 DE 90 F4 FF E3 90 ............................\n", + "00070: DD DF D4 D5 9E BD BD BA 94 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "0008C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "000A8: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 E0 F5 B0 B0 ............................\n", + "000C4: D4 36 B6 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 40 B0 92 90 BB B2 BE A0 B0 C6 B1 B0 .6..............@...........\n", + "000E0: B0 E6 B0 B0 B0 B0 B0 B0 E0 9B B1 B0 B0 A0 B0 B0 B0 B0 B0 30 B1 B0 B0 B0 B0 A0 B0 B0 ...................0........\n", + "000FC: B0 B2 B0 B0 B6 B0 B0 B0 B0 B0 B0 B0 B5 B0 B0 B0 B0 B0 B0 B0 B0 90 B2 B0 B0 B4 B0 B0 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -s intarray | pack | peek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The trained malware analyst easily spots the repeated byte `0xB0` and suspects a single-byte XOR encryption. To XOR the entire extracted buffer with `0xB0`, we use, well, the [xor][] unit:\n", + "\n", + "[peek]: https://binref.github.io/#refinery.peek\n", + "[xor]: https://binref.github.io/#refinery.xor" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", + "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", + "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", + "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -s intarray | pack | xor 0xB0 | peek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alright, this looks like the payload, and we can now [dump][] it to disk:\n", + "\n", + "[dump]: https://binref.github.io/#refinery.dump" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "%emit nl.ps1 | carve -s intarray | pack | xor 0xB0 | dump payload.dll" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n", + "00.119 MB 419ab9eaa1c64eed1d6d005ebc0c30bdc4e949ea7ee2cfee5dd34e6b3915bc02 payload.dll\n" + ] + } + ], + "source": [ + "%ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extracting The Other Payload\n", + "\n", + "Looking at the extracted executable, it's only 119kB in size. The hexadecimal encoding in the PowerShell script constitutes a blowup by 5: The byte `0` becomes `0x00` plus a comma character. However, even considering that, we only get 594kB even though the loader script is 926kB in size. What of the other 331kB? Scrolling though the file it becomes obvious that there is another buffer in there.\n", + "\n", + "In this section, we will use the framing syntax to extract both buffers. All refinery units can, in principle, produce multiple outputs for one given input. By default, multiple outputs are separated by line break characters. For example, we can use [carve][] with the `printable` format option to extract all printable strings from the payload. I use the options `--min` and `--max` to only return strings of length at least `20` and at most `100`:\n", + "\n", + "[carve]: https://binref.github.io/#refinery.carve" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "!This program cannot be run in DOS mode.\n", + "$\n", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/\n", + " !\"#$%&'()*+,-./0123\n", + "expand 32-byte kexpand 16-byte k\n", + "Launcher.SystemSettings\n" + ] + } + ], + "source": [ + "%emit payload.dll | carve --min=20 --max=100 printable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [carve][] unit has carved 5 printable substrings and they were separated by line breaks in the output. Since we have used the `-s` switch to carve the largest payload, the [carve][] unit only had a single output in our previous example. However, it can easily be used to extract the two longest matching patterns by specifying the arguments `--longest` and `--take=2`, or `-lt2` for short. We are certainly not interested in having those buffers be printed to the command line, and we do not want them separated by line breaks for any other reason either. We would like to do additional processing on each one of them individually. Let's find out how to do that.\n", + "\n", + "By adding the symbol `[` as the last argument to a refinery unit, you instruct all subsequent refinery units to work on each of the outputs individually and in sequence. Such a stream of multiple items is called a **frame**, and the items themselves are referred to as **chunks**. Internally, this simply means that when a unit receives the `[` argument, the output is serialized in a refinery-specific format so that subsequent units can understand it as a stream of multiple outputs rather than just a single blob. The last unit that performs processing inside the frame should receive the symbol `]` as its last argument: This instructs the unit to concatenate all chunks. When chunks are merged at the end of a frame, no line breaks or other separators are inserted. See also the [module documentation for the frame module][frame].\n", + "\n", + "The following example reads our sample, then [carve][]s the two largest integer array buffers from it, converts this to binary, and then [peek][]s the results:\n", + "\n", + "[carve]: https://binref.github.io/#refinery.carve\n", + "[peek]: https://binref.github.io/#refinery.peek\n", + "[frame]: https://binref.github.io/lib/frame.html" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "60.928 kB; 83.07% entropy; data\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 0A 1D D7 47 44 47 47 47 43 47 47 47 B8 B8 47 47 FF 47 47 47 47 47 47 47 07 47 47 47 ...GDGGGCGGG..GG.GGGGGGG.GGG\n", + "0001C: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 GGGGGGGGGGGGGGGGGGGGGGGGGGGG\n", + "00038: 47 47 47 47 FF 47 47 47 49 58 FD 49 47 F3 4E 8A 66 FF 46 0B 8A 66 13 2F 2E 34 67 37 GGGG.GGGIX.IG.N.f.F..f./.4g7\n", + "00054: 35 28 20 35 26 2A 67 24 26 29 29 28 33 67 25 22 67 35 32 29 67 2E 29 67 03 08 14 67 5(.5&*g$&))(3g%\"g52)g.)g...g\n", + "00070: 2A 28 23 22 69 4A 4A 4D 63 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 *(#\"iJJMcGGGGGGGGGGGGGGGGGGG\n", + "0008C: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 GGGGGGGGGGGGGGGGGGGGGGGGGGGG\n", + "000A8: 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 47 17 02 47 47 0B 46 42 47 47 47 47 47 GGGGGGGGGGGGGGGG..GG.FBGGGGG\n", + "000C4: 47 47 47 47 47 47 47 47 A7 47 45 66 4C 46 49 57 47 81 47 47 47 63 47 47 47 47 47 47 GGGGGGGG.GEfLFIWG.GGGcGGGGGG\n", + "000E0: 87 EC 47 47 47 57 47 47 47 A7 47 47 47 47 47 57 47 57 47 47 47 45 47 47 41 47 47 47 ..GGGWGGG.GGGGGWGWGGGEGGAGGG\n", + "000FC: 47 47 47 47 42 47 47 47 47 47 47 47 47 77 46 47 47 43 47 47 47 47 47 47 45 47 07 43 GGGGBGGGGGGGGwFGGCGGGGGGEG.C\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; data\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: FD EA 20 B0 B3 B0 B0 B0 B4 B0 B0 B0 4F 4F B0 B0 08 B0 B0 B0 B0 B0 B0 B0 F0 B0 B0 B0 ............OO..............\n", + "0001C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "00038: B0 B0 B0 B0 70 B0 B0 B0 BE AF 0A BE B0 04 B9 7D 91 08 B1 FC 7D 91 E4 D8 D9 C3 90 C0 ....p..........}....}.......\n", + "00054: C2 DF D7 C2 D1 DD 90 D3 D1 DE DE DF C4 90 D2 D5 90 C2 C5 DE 90 D9 DE 90 F4 FF E3 90 ............................\n", + "00070: DD DF D4 D5 9E BD BD BA 94 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "0008C: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 ............................\n", + "000A8: B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 E0 F5 B0 B0 ............................\n", + "000C4: D4 36 B6 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 B0 40 B0 92 90 BB B2 BE A0 B0 C6 B1 B0 .6..............@...........\n", + "000E0: B0 E6 B0 B0 B0 B0 B0 B0 E0 9B B1 B0 B0 A0 B0 B0 B0 B0 B0 30 B1 B0 B0 B0 B0 A0 B0 B0 ...................0........\n", + "000FC: B0 B2 B0 B0 B6 B0 B0 B0 B0 B0 B0 B0 B5 B0 B0 B0 B0 B0 B0 B0 B0 90 B2 B0 B0 B4 B0 B0 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | peek ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have a bit of a problem here: It looks like different keys were used to encrypt the two payloads. The first buffer was encrypted using the byte `0x47` while the second one (the one we already saw before) was encrypted using `0xB0`. There are several ways to do this in transit, but we will get to that later. For now, let's just [dump][] the two buffers to disk and deal with them individually:\n", + "\n", + "[dump]: https://binref.github.io/#refinery.dump" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | dump encrypted-0x47.bin encrypted-0xB0.bin ]" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00.926 MB ccd495bae43f026e05f00ebc74f989d5657e010854ce4d8870e7b9371b0222b9 nl.ps1\n", + "00.119 MB 419ab9eaa1c64eed1d6d005ebc0c30bdc4e949ea7ee2cfee5dd34e6b3915bc02 payload.dll\n", + "60.928 kB 120101d5f020c8810074fc65aa2b75c237b3535d16a220e52af108dba9f40f85 encrypted-0x47.bin\n", + "00.119 MB 285709f0c66b0d33154bcad6d8e43860dde7bcc63945fc53aeca1cb76d71b18d encrypted-0xB0.bin\n" + ] + } + ], + "source": [ + "%ls" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "%emit encrypted-0x47.bin | xor 0x47 | dump payload1.dll" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "%emit encrypted-0xB0.bin | xor 0xB0 | dump payload2.dll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A quick sanity check to make sure we used the right keys:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + " entropy = 83.07%\n", + " magic = PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", + " size = 60.928 kB\n", + "------------------------------------------------------------------------------------------------------------------------\n", + " entropy = 80.32%\n", + " magic = PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + " size = 00.119 MB\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit payload1.dll payload2.dll [| peek -ml0 ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [emit][] unit emits one chunk for each file that it reads from disk, in this case it will read the two DLL files and produce two chunks. We used `--lines=0` aka `-l0` options of [peek][] to get only a brief summary of their metadata to check that they decrypted to valid PE files. Only a few hours of reverse engineering later, you will be able to confirm your suspicion that `payload1.dll` is the 32bit variant of `payload2.dll`. The loader will deploy one or the other depending on the system architecture.\n", + "\n", + "[emit]: https://binref.github.io/#refinery.mit\n", + "[peek]: https://binref.github.io/#refinery.peek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extracting Both Payloads\n", + "\n", + "We will now show how to decrypt the two payloads in transit, i.e. without temporarily writing the encrypted buffers to disk. This is a great opportunity to illustrate a powerful feature of refinery. As a spoiler, here's a way to decrypt the two buffers without dumping them to disk:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", + "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", + "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", + "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", + "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", + "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", + "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | xor copy:3 | peek ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, we are passing the argument `copy:3` to the [xor][] unit. It does work, but we'll have to dig a little deeper to understand what is going on and why it works.\n", + "\n", + "[xor]: https://binref.github.io/#refinery.xor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multibin Arguments\n", + "\n", + "In previous examples, we have called [xor][] with the arguments `0x47` and `0xB0`, which refinery interpreted as an integer representing a byte value with which to xor every byte in the input stream. However, you could also write any of the following:\n", + "\n", + "- `xor h:dff4f503bb` - xor with the hexadecimal encoded byte sequence `DFF4F503BB`\n", + "- `xor s:terrordome` - xor with the utf8-encoded string `terrordome`\n", + "- `xor 7,3,12,120,8` - xor with the given sequence of values, i.e. `07030C7808` in hexadecimal\n", + "\n", + "These are all examples of so-called **multibin** arguments. A multibin argument starts with a number of **handlers**. A **handler** is a short identifier separated from the rest of the expression by a colon. In the above examples, `h` (for **hex**) and `s` (for **string**) are handlers. Most handlers will process the remaining expression as a multibin again, but both `h` and `s` are **final** handlers, which means that the remaining expression will not be parsed any further. This gives you two very certain ways to pass data to a refinery unit in case you are uncertain about potential multibin parsing:\n", + "```\n", + "(br) $ emit h:\n", + "(br) $ emit s:h:\n", + "h:\n", + "(br) $ emit h:s:\n", + "usage: emit [-h] [-L] [-Q] [-0] [-v] [data [data ...]]\n", + "emit: error: argument data: invalid multibin value: 'h:s:'\n", + "```\n", + "The first emits the empty hexadecimal string (which is empty), the second emits the utf8-string `h:`, and the third example tries to emit the hexadecminal string `s:`, which is nonsense, because neither `s` nor `:` are hexadecimal characters. We get a well-deserved error. When no handlers are given, a multibin value is evaluated based on its default handler:\n", + "\n", + "- Most units use the standard default handler: It first attempts to interpret the given argument as a file name and will use the contents of that file if it exists. If that fails, it will encode the string to a byte sequence using UTF8.\n", + "- Arithmetic and bitwise block operations (like [xor][], [sub][], [add][], [shr][], [shl][], [rotr][], [rotl][], [neg][]) will attempt to interpret the given argument as an Python expression representing an integer or a sequence of integers. Only when this fails, they revert to the standard default handler.\n", + "- The regular expression units [rex][], [resub][], and [resplit][] do not try to open any files, and they also provide a few additional handlers.\n", + "\n", + "The module documentation of the [argformats][] module contains all handlers and documents their purpose.\n", + "\n", + "[argformats]: https://binref.github.io/lib/argformats.html\n", + "\n", + "[emit]: https://binref.github.io/#refinery.emit\n", + "[rotl]: https://binref.github.io/#refinery.rotl\n", + "[rotr]: https://binref.github.io/#refinery.rotr\n", + "[rex]: https://binref.github.io/#refinery.rex\n", + "[shl]: https://binref.github.io/#refinery.shl\n", + "[shr]: https://binref.github.io/#refinery.shr\n", + "[sub]: https://binref.github.io/#refinery.sub\n", + "[add]: https://binref.github.io/#refinery.add\n", + "[xor]: https://binref.github.io/#refinery.xor\n", + "[neg]: https://binref.github.io/#refinery.neg\n", + "[drp]: https://binref.github.io/#refinery.drp\n", + "[resplit]: https://binref.github.io/#refinery.resplit\n", + "[resub]: https://binref.github.io/#refinery.resub" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Copy Handler\n", + "\n", + "In our example, we used the `copy` handler for the argument to [xor][]. This handler is final, just like `s` and `h`. It also has the short version `c`, so you could just as well write the following to decrypt both buffers:\n", + "\n", + "[xor]: https://binref.github.io/#refinery.xor" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", + "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", + "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", + "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", + "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", + "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", + "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | xor c:3 | peek ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `copy` handler is final. It parses the remaining expression as a Python index expression with support for slice notation; the value of the argument will be the corresponding slice copied from the input to the currently operating unit. In our example, we simply want to use the **fourth byte of the input** (i.e. the one at index `3`) to be used as the XOR key. Just to demonstrate, we could equally well have copied bytes 5, 6, and 7 (all of which decrypt to zero bytes):" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", + "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", + "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", + "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", + "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", + "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", + "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | xor c:5:8 | peek ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, `5:8` is the Python slice starting at index `5` and stopping before reaching `8`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unit Handlers\n", + "\n", + "Every available refinery unit can also be used as a handler. Using `copy:3` as the decryption key does work very well, but we can more succinctly express the heuristic that we used. The [drp][] unit finds and detects frequently repeating patterns in its input data. Hence, if you suspect a single byte XOR to have been used on a buffer that contains a lot of zero bytes (like a PE file), the following will work:\n", + "\n", + "[drp]: https://binref.github.io/#refinery.drp" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "60.928 kB; 83.07% entropy; PE32 executable (DLL) (GUI) Intel 80386, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 B8 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 4C 01 05 00 00 00 00 00 ................PE..L.......\n", + "000C4: 00 00 00 00 00 00 00 00 E0 00 02 21 0B 01 0E 10 00 C6 00 00 00 24 00 00 00 00 00 00 ...........!.........$......\n", + "000E0: C0 AB 00 00 00 10 00 00 00 E0 00 00 00 00 00 10 00 10 00 00 00 02 00 00 06 00 00 00 ............................\n", + "000FC: 00 00 00 00 05 00 00 00 00 00 00 00 00 30 01 00 00 04 00 00 00 00 00 00 02 00 40 04 .............0............@.\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00.119 MB; 80.32% entropy; PE32+ executable (DLL) (GUI) x86-64, for MS Windows\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 4D 5A 90 00 03 00 00 00 04 00 00 00 FF FF 00 00 B8 00 00 00 00 00 00 00 40 00 00 00 MZ......................@...\n", + "0001C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "00038: 00 00 00 00 C0 00 00 00 0E 1F BA 0E 00 B4 09 CD 21 B8 01 4C CD 21 54 68 69 73 20 70 ................!..L.!This.p\n", + "00054: 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 rogram.cannot.be.run.in.DOS.\n", + "00070: 6D 6F 64 65 2E 0D 0D 0A 24 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mode....$...................\n", + "0008C: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ............................\n", + "000A8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 45 00 00 ........................PE..\n", + "000C4: 64 86 06 00 00 00 00 00 00 00 00 00 00 00 00 00 F0 00 22 20 0B 02 0E 10 00 76 01 00 d.................\"......v..\n", + "000E0: 00 56 00 00 00 00 00 00 50 2B 01 00 00 10 00 00 00 00 00 80 01 00 00 00 00 10 00 00 .V......P+..................\n", + "000FC: 00 02 00 00 06 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 00 20 02 00 00 04 00 00 ............................\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -lt2 intarray [| pack | xor drp:c::100 | peek ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The argument to [xor][] now first copies the first 100 bytes from the input using `c::100`. These bytes are passed to the [drp][] unit, which will extract the most frequent repeating byte pattern from it. In our example, the patterns are just single bytes, but this method can also work for longer XOR keys.\n", + "\n", + "[xor]: https://binref.github.io/#refinery.xor\n", + "[drp]: https://binref.github.io/#refinery.drp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extracting The Configuration\n", + "\n", + "The Netwalker configuration is stored as an RC4 encrypted buffer in a resource called `31337`, which is usually the only PE resource of the file. The buffer starts with a 32bit integer specifying the key length, followed by the key, followed by the encrypted data:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "05.434 kB; 99.57% entropy; data\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "00000: 05 00 00 00 73 23 44 6F 38 8D 3E 4C 31 50 31 BE 51 16 7B 33 81 7A 34 2F 77 50 44 6F ....s#Do8.>L1P1.Q.{3.z4/wPDo\n", + "0001C: 8B DB 55 0A 1D BC F4 5D 23 C6 E1 26 D4 FB FF FD 0D E1 34 4F 08 F5 2C A1 2D C4 7C 04 ..U....]#..&......4O..,.-.|.\n", + "00038: D4 BC 70 BB 47 CA 6C 2D E5 3A 45 B6 92 52 74 85 58 69 52 CB 9E 70 C2 26 32 0D 5A 0C ..p.G.l-.:E..Rt.XiR..p.&2.Z.\n", + "00054: 0A D6 65 1F 8E 87 90 77 5E 4A C8 AA EA 56 FD A4 94 FF BB 9F 16 83 4B A7 16 33 00 9E ..e....w^J...V........K..3..\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc | peek -l4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The decrypted configuration is in JSON format. The following is how we can extract the Netwalker configuration from this dropper without ever writing a single intermediate result to disk:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + "08.867 kB; 55.91% entropy; ASCII text, with very long lines\n", + "---------------------------------------------------------------------------------------------------------------[utf8]---\n", + "{\n", + " \"mpk\": \"kzo1XdPfYBYrIPNqwr7YxsVS2rzbhlHusvwLlbNVowc=\",\n", + " \"mode\": 0,\n", + " \"spsz\": 4,\n", + " \"thr\": 1500,\n", + " \"namesz\": 8,\n", + " \"idsz\": 6,\n", + " \"pers\": false,\n", + " \"onion1\": \"pb36hu4spl6cyjdfhing7h3pw6dhpk32ifemawkujj4gp33ejzdq3did.onion\",\n", + " \"onion2\": \"rnfdsgm6wb6j6su5txkekw4u4y47kp2eatvu7d6xhyn5cs4lt4pdrqqd.onion\",\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 [| carve -ds intarray | xor c:3 | perc | put k le:x::4 | rc4 x::k ]| ppjson | peek -d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A lot of this already makes sense to us, but a few new things are happening, too. Firstly, we have used the `-d` (short for `--decode`) flag of [carve][]. For most patterns, there is an obvious decoding algorithm, and [carve][] can apply this decoding automatically. In the case of the `intarray` format, the [pack][] unit is invoked. After decrypting the payload, we use the [perc][] unit to extract all PE resources. We can use the `--list` option to get a list of all PE resources in the buffer:\n", + "\n", + "[perc]: https://binref.github.io/#refinery.perc\n", + "[pack]: https://binref.github.io/#refinery.pack\n", + "[carve]: https://binref.github.io/#refinery.carve" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1337/31337/0\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc -l" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since there is only one, we can simply continue processing the complete output of [perc][]. In general, [perc][] can be given a wildcard expression to select only the resources you are interested in, and [perc][] will then extract each of those as one output chunk. The next unit is where it gets interesting. We run the [put][] unit with the parameters `k` and `le:x::4`, and then we process the result using the [rc4][] unit with the argument `x::k`. You may have already guessed it, `k` is a variable containing the length of the RC4 key.\n", + "\n", + "Chunks in a refinery frame can carry a dictionary of metadata, also referred to as **meta variables**. As usual, it is recommended to also read [the official documentation about meta variables][meta]. There are a few units that can generate meta variables, and [put][] is likely the most straightforward way to do so. The [put][] unit takes as its first argument the name of the variable and as its second argument some multibin expression to store in that variable. In this case, we store `le:x::4`, which cuts out the first 4 bytes and decodes them to an integer using little-endian encoding (that's what the `le` handler does). From this point on, the variable `k` is available in the frame and can be used as part of multibin expressions. The [peek][] unit displays the contents of all meta variables that are present on a chunk; in this case there are two variables:\n", + "\n", + "[put]: https://binref.github.io/#refinery.put\n", + "[rc4]: https://binref.github.io/#refinery.rc4\n", + "[perc]: https://binref.github.io/#refinery.perc\n", + "[peek]: https://binref.github.io/#refinery.peek\n", + "\n", + "[meta]: https://binref.github.io/lib/meta.html" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + " k = 5\n", + " lcid = Neutral Locale Language\n", + " offset = 0x1B858\n", + " path = 1337/31337/0\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit nl.ps1 | carve -ds intarray | xor c:3 | perc [| put k le:x::4 | peek -l0 ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here you can see that the [perc][] unit has also attached a piece of metadata to the chunk, namely the path of the resource that it extracted. \n", + "\n", + "[perc]: https://binref.github.io/#refinery.perc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "Congratulations, you made it! This tutorial has introduced **framing syntax**, **multibin handlers**, and **meta variables**, and these are all the core concepts of binary refinery toolkit. In combination, they can perform a fairly broad range of data transformations. Future tutorials will focus on extending the binary refinery with custom units and using refinery units within Python code. Stay tuned!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.7 ('venv')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "4bb4b02bf57a2c25456a741474d02d2de926aec3c451f22b312ec34f66909bb4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/tutorials/tbr-files.v0x02.amadey.loader.ipynb b/tutorials/tbr-files.v0x02.amadey.loader.ipynb index dc993ac458..e012f447c8 100644 --- a/tutorials/tbr-files.v0x02.amadey.loader.ipynb +++ b/tutorials/tbr-files.v0x02.amadey.loader.ipynb @@ -1,268 +1,262 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The Refinery Files 0x02: Amadey Loader\n", - "\n", - "This is a short tutorial about decrypting the strings in the following Amadey Loader sample:\n", - "```\n", - "6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba\n", - "```\n", - "As always, remember that this is **malware**, do not execute it unless you know exactly what you are doing. For instructions about how to set up [refinery], see the main page and documentation.\n", - "\n", - "[refinery]: https://github.com/binref/refinery/" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials import boilerplate\n", - "boilerplate.store_sample(\n", - " name='a.bin',\n", - " hash='6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00.223 MB 6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba a.bin\n" - ] - } - ], - "source": [ - "%ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## String Decryption\n", - "\n", - "After some reverse engineering, you discover that the strings in this binary can be identified as the ones that are in uppercase hex format. After hex-decoding them, they are decrypted by sequentially subtracting the bytes of a key string. This is the key string:\n", - "```\n", - "6768875d0dd576a718d85aa1d71d25c1\n", - "```\n", - "... but oddly enough, the malware's decryption routine adds `1` to every index when accessing this buffer. This means that it will never access the very first byte of the key, but it will also use the zero byte that terminates it. A quick way to extract the C2 servers from this sample would therefore be the following:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "happyday9risce[.]com\n", - "xksldjf9sksdjfks[.]com\n", - "dhisa8f9ah02hopasiaf[.]com\n" - ] - } - ], - "source": [ - "%emit a.bin | carve -dn10 hex [| sub q:768875d0dd576a718d85aa1d71d25c1%00 | xtp domain | defang | sep ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we have first used [carve] using the `hex` pattern to extract strings that are likely encrypted. We used the `-n` switch to select a minimum length of 10 characters, which corresponds to 5 decoded bytes. We also used the `-d` (or `--decode`) flag to immediately hex-decode the matched strings. We then open a [frame] so that we can work on each of these strings individually. Each string is then decrypted by subtracting the bytes of a key using the [sub] unit; this key is generated from the following [multibin] expression:\n", - "```\n", - "q:768875d0dd576a718d85aa1d71d25c1%00\n", - "```\n", - "This represents the result of URL-decoding the string, which is the decryption key with the first letter removed and a zero byte appended; these two modifications account for the off-by-one access used in Amadey's code. After decrypting the string, we run [xtp] on it, which is short for \"extract pattern\": The unit is designed to extract indicators. There are a number of patterns available by name, in this case we are interested in domains. Run `xtp -h` to obtain a list of all currently available indicator patterns. Finally, we use [defang] because these domains will end up in a Jupiter notebook and we want to prevent that some web component somewhere becomes overeager and turns them into a clickable link. Because we would like to have the outputs separated by new lines in the terminal, we finally pipe the results to [sep] before closing the frame.\n", - "\n", - "[carve]: https://binref.github.io/#refinery.carve\n", - "[defang]: https://binref.github.io/#refinery.defang\n", - "[frame]: https://binref.github.io/lib/frame.html\n", - "[multibin]: https://binref.github.io/lib/argformats.html\n", - "[sub]: https://binref.github.io/#refinery.sub\n", - "[sep]: https://binref.github.io/#refinery.sep\n", - "[xtp]: https://binref.github.io/#refinery.xtp" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The pipeline is fine but just for the sake of keeping this tutorial going we can try to:\n", - "1. First extract potential keys based on a wild guess of the format,\n", - "2. then try to decrypt all the strings with all the key candidates and keep only the good stuff." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extracting Key Candidates\n", - "\n", - "After a little more reverse engineering, you notice that there are actually three strings in the binary that look likey key material:\n", - "\n", - "1. `6768875d0dd576a718d85aa1d71d25c1`\n", - "2. `396554bad854c42cee4903aadccae3d4`\n", - "3. `f12bb04fdd6d0132403f4b3bd4d4814b`\n", - "\n", - "All of them are 32 characters long and use lowercase hex characters. Notably, the encrypted strings all used **uppercase** hex characters. There is reason to believe that the other two sequences are used as decryption keys for other kinds of data, but that's something you can figure out later. For now, we would like to devise a refinery pipeline that has a chance to work on other samples where the string encryption key might be different. We will, however, assume that the format of the key will always be lowercase hex and 32 characters in length. This is clearly not a safe assumption, but I'll need you to get all the way off of my back about that so that the tutorial can happen. First, let's get all the potential keys out of our sample:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1. 6768875d0dd576a718d85aa1d71d25c1\n", - "2. 396554bad854c42cee4903aadccae3d4\n", - "3. f12bb04fdd6d0132403f4b3bd4d4814b\n" - ] - } - ], - "source": [ - "%emit a.bin | rex [a-f0-9]{32} [| cfmt {index+1}. {} ]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we use [rex] (the regular expression unit) to search for all 32-letter lowercase hex strings. For each of those, we invoke the [cfmt] unit so that I can talk about it in this tutorial. Its command line arguments are interpreted as a Python format string expression which can use any [meta] variables and the symbol `{}`, the latter representing the body of the current chunk. In this case, we use the magic [meta] variable `index` which represents the index of the current chunk in the frame. Since the chunk index is `0`-based, we have to add `1` in order to have the output list start at `1`. Finally, notice that we did not use the [sep] unit, but the output was separated by newlines anyway: This happened because we used `]]` rather than `]` to close the frame; using more closing brackets than necessary will insert newlines between all output chunks. This is an intentional piece of syntactic sugar which has been added for convenience.\n", - "\n", - "Now we have a list of all prospect keys, but what we'd really want is to have three chunks, each of which contains the contents of `a.bin`, but each also carrying a different prospect key as a piece of [meta]data. We can achieve this like so:\n", - "\n", - "[cfmt]: https://binref.github.io/#refinery.cfmt\n", - "[rex]: https://binref.github.io/#refinery.rex\n", - "[sep]: https://binref.github.io/#refinery.sep\n", - "[meta]: https://binref.github.io/lib/meta.html" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------------------------------------------------------\n", - " entropy = 80.76%\n", - " key = 6768875d0dd576a718d85aa1d71d25c1\n", - " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", - " offset = 0x2FBBC\n", - " size = 00.223 MB\n", - "------------------------------------------------------------------------------------------------------------------------\n", - " entropy = 80.76%\n", - " key = 396554bad854c42cee4903aadccae3d4\n", - " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", - " offset = 0x2FBE0\n", - " size = 00.223 MB\n", - "------------------------------------------------------------------------------------------------------------------------\n", - " entropy = 80.76%\n", - " key = f12bb04fdd6d0132403f4b3bd4d4814b\n", - " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", - " offset = 0x2FC0C\n", - " size = 00.223 MB\n", - "------------------------------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "%emit a.bin | rex [a-f0-9]{32} [| swap key | emit a.bin | peek -ml0 ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What we did here is to first [emit] the contents of `a.bin`. As before, we then extract all key candidates from it, opening a new [frame] to process each of these strings individually. However, we will want to work on the contents of `a.bin` again, not the key string itself. To get to a point where we can do that, we first use [swap] to move the contents of the current chunk into the [meta]-variable `key`. After invoking the [swap] unit, the frame contains three empty chunks, each of which has one of the key candidates attached to it as a variable. Then we simply run [emit] again which gives us three chunks, each of which contains the contents of `a.bin`.\n", - "\n", - "[emit]: https://binref.github.io/#refinery.emit\n", - "[swap]: https://binref.github.io/#refinery.swap\n", - "[frame]: https://binref.github.io/lib/frame.html\n", - "[meta]: https://binref.github.io/lib/meta.html" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Now All Together\n", - "\n", - "The final step is literally just sticking the two pipelines together:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "happyday9risce[.]com\n", - "xksldjf9sksdjfks[.]com\n", - "dhisa8f9ah02hopasiaf[.]com\n" - ] - } - ], - "source": [ - "%emit a.bin [| rex [a-f0-9]{32} | swap key | emit a.bin | carve -dn10 hex | sub cca[h:00]:snip[1:]:var:key | xtp | defang ]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since the key is now stored as a variable, we need to do a little more work to account for the off-by-one glitch in Amadey's code. First, the expression `var:key` contains the contents of the `key` variable of the current chunk. Next, `snip[1:]:var:key` skips the first byte and finally, `cca[h:00]:take[1:]:var:key` appends a zero byte. This [multibin] expression uses both [snip] and [cca] as unit-based handlers.\n", - "\n", - "[cca]: https://binref.github.io/#refinery.cca\n", - "[multibin]: https://binref.github.io/lib/argformats.html\n", - "[snip]: https://binref.github.io/#refinery.snip" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "4bb4b02bf57a2c25456a741474d02d2de926aec3c451f22b312ec34f66909bb4" - }, - "kernelspec": { - "display_name": "Python 3.9.7 ('venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The Refinery Files 0x02: Amadey Loader\n", + "\n", + "This is a short tutorial about decrypting the strings in the following Amadey Loader sample:\n", + "```\n", + "6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba\n", + "```\n", + "As always, remember that this is **malware**, do not execute it unless you know exactly what you are doing. For instructions about how to set up [refinery], see the main page and documentation.\n", + "\n", + "[refinery]: https://github.com/binref/refinery/" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "from tutorials import boilerplate\n", + "boilerplate.store_sample(\n", + " name='a.bin',\n", + " hash='6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba'\n", + ")" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00.223 MB 6e01f9d1997186d06274a508bc0a511aa6fb50e430b77efca593c00d3fc62cba a.bin\n" + ] + } + ], + "source": [ + "%ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## String Decryption\n", + "\n", + "After some reverse engineering, you discover that the strings in this binary can be identified as the ones that are in uppercase hex format. After hex-decoding them, they are decrypted by sequentially subtracting the bytes of a key string. This is the key string:\n", + "```\n", + "6768875d0dd576a718d85aa1d71d25c1\n", + "```\n", + "... but oddly enough, the malware's decryption routine adds `1` to every index when accessing this buffer. This means that it will never access the very first byte of the key, but it will also use the zero byte that terminates it. A quick way to extract the C2 servers from this sample would therefore be the following:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "happyday9risce[.]com\n", + "xksldjf9sksdjfks[.]com\n", + "dhisa8f9ah02hopasiaf[.]com\n" + ] + } + ], + "source": [ + "%emit a.bin | carve -dn10 hex [| sub q:768875d0dd576a718d85aa1d71d25c1%00 | xtp domain | defang | sep ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we have first used [carve] using the `hex` pattern to extract strings that are likely encrypted. We used the `-n` switch to select a minimum length of 10 characters, which corresponds to 5 decoded bytes. We also used the `-d` (or `--decode`) flag to immediately hex-decode the matched strings. We then open a [frame] so that we can work on each of these strings individually. Each string is then decrypted by subtracting the bytes of a key using the [sub] unit; this key is generated from the following [multibin] expression:\n", + "```\n", + "q:768875d0dd576a718d85aa1d71d25c1%00\n", + "```\n", + "This represents the result of URL-decoding the string, which is the decryption key with the first letter removed and a zero byte appended; these two modifications account for the off-by-one access used in Amadey's code. After decrypting the string, we run [xtp] on it, which is short for \"extract pattern\": The unit is designed to extract indicators. There are a number of patterns available by name, in this case we are interested in domains. Run `xtp -h` to obtain a list of all currently available indicator patterns. Finally, we use [defang] because these domains will end up in a Jupiter notebook and we want to prevent that some web component somewhere becomes overeager and turns them into a clickable link. Because we would like to have the outputs separated by new lines in the terminal, we finally pipe the results to [sep] before closing the frame.\n", + "\n", + "[carve]: https://binref.github.io/#refinery.carve\n", + "[defang]: https://binref.github.io/#refinery.defang\n", + "[frame]: https://binref.github.io/lib/frame.html\n", + "[multibin]: https://binref.github.io/lib/argformats.html\n", + "[sub]: https://binref.github.io/#refinery.sub\n", + "[sep]: https://binref.github.io/#refinery.sep\n", + "[xtp]: https://binref.github.io/#refinery.xtp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline is fine but just for the sake of keeping this tutorial going we can try to:\n", + "1. First extract potential keys based on a wild guess of the format,\n", + "2. then try to decrypt all the strings with all the key candidates and keep only the good stuff." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extracting Key Candidates\n", + "\n", + "After a little more reverse engineering, you notice that there are actually three strings in the binary that look likey key material:\n", + "\n", + "1. `6768875d0dd576a718d85aa1d71d25c1`\n", + "2. `396554bad854c42cee4903aadccae3d4`\n", + "3. `f12bb04fdd6d0132403f4b3bd4d4814b`\n", + "\n", + "All of them are 32 characters long and use lowercase hex characters. Notably, the encrypted strings all used **uppercase** hex characters. There is reason to believe that the other two sequences are used as decryption keys for other kinds of data, but that's something you can figure out later. For now, we would like to devise a refinery pipeline that has a chance to work on other samples where the string encryption key might be different. We will, however, assume that the format of the key will always be lowercase hex and 32 characters in length. This is clearly not a safe assumption, but I'll need you to get all the way off of my back about that so that the tutorial can happen. First, let's get all the potential keys out of our sample:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1. 6768875d0dd576a718d85aa1d71d25c1\n", + "2. 396554bad854c42cee4903aadccae3d4\n", + "3. f12bb04fdd6d0132403f4b3bd4d4814b\n" + ] + } + ], + "source": [ + "%emit a.bin | rex [a-f0-9]{32} [| cfmt {index+1}. {} ]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we use [rex] (the regular expression unit) to search for all 32-letter lowercase hex strings. For each of those, we invoke the [cfmt] unit so that I can talk about it in this tutorial. Its command line arguments are interpreted as a Python format string expression which can use any [meta] variables and the symbol `{}`, the latter representing the body of the current chunk. In this case, we use the magic [meta] variable `index` which represents the index of the current chunk in the frame. Since the chunk index is `0`-based, we have to add `1` in order to have the output list start at `1`. Finally, notice that we did not use the [sep] unit, but the output was separated by newlines anyway: This happened because we used `]]` rather than `]` to close the frame; using more closing brackets than necessary will insert newlines between all output chunks. This is an intentional piece of syntactic sugar which has been added for convenience.\n", + "\n", + "Now we have a list of all prospect keys, but what we'd really want is to have three chunks, each of which contains the contents of `a.bin`, but each also carrying a different prospect key as a piece of [meta]data. We can achieve this like so:\n", + "\n", + "[cfmt]: https://binref.github.io/#refinery.cfmt\n", + "[rex]: https://binref.github.io/#refinery.rex\n", + "[sep]: https://binref.github.io/#refinery.sep\n", + "[meta]: https://binref.github.io/lib/meta.html" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------------------------------------------------------\n", + " entropy = 80.76%\n", + " key = 6768875d0dd576a718d85aa1d71d25c1\n", + " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", + " offset = 0x2FBBC\n", + " size = 00.223 MB\n", + "------------------------------------------------------------------------------------------------------------------------\n", + " entropy = 80.76%\n", + " key = 396554bad854c42cee4903aadccae3d4\n", + " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", + " offset = 0x2FBE0\n", + " size = 00.223 MB\n", + "------------------------------------------------------------------------------------------------------------------------\n", + " entropy = 80.76%\n", + " key = f12bb04fdd6d0132403f4b3bd4d4814b\n", + " magic = PE32 executable (GUI) Intel 80386, for MS Windows\n", + " offset = 0x2FC0C\n", + " size = 00.223 MB\n", + "------------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "%emit a.bin | rex [a-f0-9]{32} [| swap key | emit a.bin | peek -ml0 ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What we did here is to first [emit] the contents of `a.bin`. As before, we then extract all key candidates from it, opening a new [frame] to process each of these strings individually. However, we will want to work on the contents of `a.bin` again, not the key string itself. To get to a point where we can do that, we first use [swap] to move the contents of the current chunk into the [meta]-variable `key`. After invoking the [swap] unit, the frame contains three empty chunks, each of which has one of the key candidates attached to it as a variable. Then we simply run [emit] again which gives us three chunks, each of which contains the contents of `a.bin`.\n", + "\n", + "[emit]: https://binref.github.io/#refinery.emit\n", + "[swap]: https://binref.github.io/#refinery.swap\n", + "[frame]: https://binref.github.io/lib/frame.html\n", + "[meta]: https://binref.github.io/lib/meta.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now All Together\n", + "\n", + "The final step is literally just sticking the two pipelines together:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "happyday9risce[.]com\n", + "xksldjf9sksdjfks[.]com\n", + "dhisa8f9ah02hopasiaf[.]com\n" + ] + } + ], + "source": [ + "%emit a.bin [| rex [a-f0-9]{32} | swap key | emit a.bin | carve -dn10 hex | sub cca[h:00]:snip[1:]:var:key | xtp | defang ]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since the key is now stored as a variable, we need to do a little more work to account for the off-by-one glitch in Amadey's code. First, the expression `var:key` contains the contents of the `key` variable of the current chunk. Next, `snip[1:]:var:key` skips the first byte and finally, `cca[h:00]:take[1:]:var:key` appends a zero byte. This [multibin] expression uses both [snip] and [cca] as unit-based handlers.\n", + "\n", + "[cca]: https://binref.github.io/#refinery.cca\n", + "[multibin]: https://binref.github.io/lib/argformats.html\n", + "[snip]: https://binref.github.io/#refinery.snip" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "4bb4b02bf57a2c25456a741474d02d2de926aec3c451f22b312ec34f66909bb4" + }, + "kernelspec": { + "display_name": "Python 3.9.7 ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/tutorials/tbr-files.v0x03.seduploader.ipynb b/tutorials/tbr-files.v0x03.seduploader.ipynb index d6a88b18d5..000df4ffd1 100644 --- a/tutorials/tbr-files.v0x03.seduploader.ipynb +++ b/tutorials/tbr-files.v0x03.seduploader.ipynb @@ -1,556 +1,548 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The Refinery Files 0x03: SedUpLoader C2s\n", - "\n", - "This is a tutorial about extracting the C2 domains from [SedUpLoader] samples. We will be working with the following one:\n", - "```\n", - "2396c9dac2184405f7d1f127bec88e56391e4315d4d2e5b951c795fdc1982d59\n", - "```\n", - "As always, remember that this is **malware**, do not execute it unless you know exactly what you are doing. For instructions about how to set up [refinery], see the main page and documentation.\n", - "\n", - "[refinery]: https://github.com/binref/refinery/\n", - "[SedUpLoader]: https://malpedia.caad.fkie.fraunhofer.de/details/win.seduploader" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from tutorials import boilerplate\n", - "boilerplate.store_sample(\n", - " name='a.bin',\n", - " hash='2396c9dac2184405f7d1f127bec88e56391e4315d4d2e5b951c795fdc1982d59'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "42.496 kB 2396c9dac2184405f7d1f127bec88e56391e4315d4d2e5b951c795fdc1982d59 a.bin\n" - ] - } - ], - "source": [ - "%ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## String Decryption\n", - "\n", - "After some reverse engineering, you discover that the function at `0x403FBA` implements the string decryption, which is an XOR with the following 13-byte sequence, stored at the virtual address `0x408b78`:\n", - "```\n", - "5f19362c533e6f1a0c6a202e34\n", - "```\n", - "Most calls to the string decryption function decrypt a constant string. Let us first decrypt those constant strings. The string decryption functions receives its two arguments (the encrypted string buffer and its length) on the stack, and the opcodes for such a call look similar to this:\n", - "```\n", - "00404a8f 6a XX PUSH X\n", - "00404a91 68 YY YY YY YY PUSH Y\n", - "00404a96 ...\n", - "00404a98 e8 1d f5 ff ff CALL STRING_DECRYPT\n", - "```\n", - "where `X` is the length and `Y` is the string address. We will first try to find all these call sequences. First, we use [emit] to output the contents of the malware sample by using the [rex] unit to search for the opcode sequence of pushing a nonzero byte and a 32bit-integer address to the stack:\n", - "```\n", - "rex \"\\x6A([^\\0])\\x68(.{4})\" {1}{2}\n", - "```\n", - "The second argument to [rex] is the format string `{1}{2}` which means to simply concatenate the first and second match group - in this case, this will be the single byte encoding the string length and the four bytes encoding its address. We then use the [struct] unit to parse the integers from the opcode sequence; the struct format `{n:B}{a:L}` contains two format fields: `{n:B}` to read the one-byte string length value into the variable `n`, and `{a:L}` to read the 4-byte string address value into the variable `a`. Finally, we use [cfmt] to pretty-print the output.\n", - "\n", - "[emit]: https://binref.github.io/#refinery.emit\n", - "[rex]: https://binref.github.io/#refinery.rex\n", - "[struct]: https://binref.github.io/#refinery.struct\n", - "[cfmt]: https://binref.github.io/#refinery.cfmt" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "address=0x1FC0EAEE, length=7\n", - "address=0xA48D6762, length=43\n", - "address=0x000000A9, length=1\n", - "address=0x00408150, length=14\n", - "address=0x00000093, length=1\n", - "address=0x00000244, length=1\n", - "address=0x00408160, length=9\n", - "address=0x00000094, length=1\n", - "address=0x0040816C, length=8\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "address=0x00000097, length=1\n", - "address=0x00000095, length=1\n", - "address=0x00408144, length=12\n", - "address=0x000001F2, length=9\n", - "address=0x00000239, length=1\n", - "address=0x0000017E, length=4\n", - "address=0x0000017F, length=4\n", - "address=0x000001A4, length=5\n", - "address=0x000001A2, length=5\n", - "address=0x00408AEC, length=1\n", - "address=0x00408AF0, length=1\n", - "address=0x00408AEC, length=1\n", - "address=0x01010101, length=255\n", - "address=0x40000000, length=2\n", - "address=0x00408B88, length=12\n", - "address=0x19F78C90, length=92\n", - "address=0x5BC1D14F, length=94\n", - "address=0xC930EA1E, length=93\n", - "address=0x0D89AD05, length=75\n", - "address=0x00408BA4, length=43\n", - "address=0x00408D84, length=4\n", - "address=0x00408D6C, length=5\n", - "address=0x00408D74, length=6\n", - "address=0x00408D7C, length=6\n", - "address=0x00408BE2, length=1\n", - "address=0x00408BE4, length=12\n", - "address=0x00408D8C, length=14\n", - "address=0x00408D84, length=4\n", - "address=0x00408E04, length=12\n", - "address=0x00408E10, length=2\n", - "address=0x00408DA0, length=67\n", - "address=0x00408DE4, length=6\n", - "address=0x00408BF0, length=44\n", - "address=0x00408DEC, length=14\n", - "address=0x00408DFC, length=6\n", - "address=0x00408D88, length=3\n", - "address=0x00408D9C, length=2\n", - "address=0x00408BF0, length=44\n", - "address=0x00408BF0, length=44\n", - "address=0x00408D68, length=2\n", - "address=0x00408E14, length=11\n", - "address=0x00408DA0, length=67\n", - "address=0x00408DE4, length=6\n", - "address=0x00408D84, length=4\n", - "address=0x00408BE3, length=1\n", - "address=0x00408BE0, length=1\n", - "address=0x00408D6A, length=1\n", - "address=0x00408D6B, length=1\n", - "address=0x00408BE1, length=1\n", - "address=0x00408BE1, length=1\n", - "address=0x00408E7C, length=6\n", - "address=0x00408E84, length=7\n", - "address=0x00408E8C, length=6\n", - "address=0x00408E94, length=7\n", - "address=0x00408E9C, length=10\n", - "address=0x00408EAC, length=11\n", - "address=0x00408E60, length=8\n", - "address=0x00408E68, length=10\n", - "address=0x00408E74, length=6\n", - "address=0x00408EA8, length=2\n", - "address=0x80000000, length=1\n", - "address=0x00408EF8, length=18\n", - "address=0x00408EE0, length=23\n", - "address=0x00408F0C, length=19\n", - "address=0x00408FD4, length=29\n", - "address=0x00008088, length=102\n" - ] - } - ], - "source": [ - "%emit a.bin | rex \"\\x6A([^\\0])\\x68(.{4})\" {1}{2} [| struct {n:B}{a:L} | cfmt address=0x{a:08X}, length={n} ]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is already quite clear that some of these are probably false positives; for example, the \"address\" `0x00008088` is invalid. That should not be a problem for our next step, though. We now want to adjust the pipeline so that we actually extract the encrypted strings rather than just their addresses. At the end of our current pipeline, we are working on a stream of 5-byte sequences which encode a length (as one byte) and an address: We have already lost the data of the original sample when we ran the [rex] command. To correct this, we will first use [put] to store a backup of the sample data in a variable called `bin`. This variable will still be attached to the results of [rex] when they pass to the [struct] unit. We then alter the [struct] command as follows:\n", - "```\n", - "struct {n:B}{a:L} {bin}\n", - "```\n", - "We will still parse out the string length and address as variables `n` and `a`, respectively. The second argument of struct is an optional string format expression that defines the output body. In this case, we are instructing it to output the contents of the previously defined variable `bin`. After this command, the output will be several copies of the malware sample, each of which has meta variables `a` and `n` defined, specifying the virtual address and length of what is potentially an encrypted string. To extract the actual strings, we use the [vsnip] unit, which can extract data from executable formats based on virtual addresses. We specify the `--quiet` flag for [vsnip] because we already know that some addresses will be bogus and we want to simply ignore those warnings.\n", - "\n", - "[peek]: https://binref.github.io/#refinery.peek\n", - "[put]: https://binref.github.io/#refinery.put\n", - "[rex]: https://binref.github.io/#refinery.rex\n", - "[struct]: https://binref.github.io/#refinery.struct\n", - "[vsnip]: https://binref.github.io/#refinery.vsnip" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00.014 kB: 18 7C 42 7C 21 51 0C 7F 7F 19 68 4B 55 2F .|B|!Q....hKU/ \n", - "00.009 kB: 17 7C 57 5C 12 52 03 75 6F .|W\\.R.uo \n", - "00.008 kB: 17 7C 57 5C 15 4C 0A 7F .|W\\.L.. \n", - "00.012 kB: 13 76 57 48 1F 57 0D 68 6D 18 59 6F .vWH.W.hm.Yo \n", - "00.001 kB: 32 2 \n", - "00.001 kB: 34 4 \n", - "00.001 kB: 32 2 \n", - "00.012 kB: 2D 6C 58 48 3F 52 5C 28 22 0F 58 4B -lXH?R\\(\".XK \n", - "00.043 kB: 0C 40 65 78 16 73 33 59 79 18 52 4B 5A 2B 5A 59 42 27 4C 00 76 5F 0F 54 72 67 3A .@ex.s3Yy.RKZ+ZYB'L.v_.Trg:\n", - "00.004 kB: 0F 56 65 78 .Vex \n", - "00.005 kB: 3B 70 45 47 6E ;pEGn \n", - "00.006 kB: 3D 6C 5F 40 37 03 =l_@7. \n", - "00.006 kB: 36 77 5C 49 30 4A 6w\\I0J \n", - "00.001 kB: 28 ( \n", - "00.012 kB: 6F 4E 50 15 6B 07 59 5A 3E 55 19 1F oNP.k.YZ>U.. \n", - "00.014 kB: 70 3C 45 09 20 1B 1C 35 33 4F 53 13 11 2C po..j..4_\n", - "00.014 kB: 28 6E 41 02 34 51 00 7D 60 0F 0E 4D 5B 32 (nA.4Q.}`..M[2 \n", - "00.006 kB: 2C 7C 57 5E 30 56 ,|W^0V \n", - "00.003 kB: 18 5C 62 .\\b \n", - "00.002 kB: 2E 24 .$ \n", - "00.044 kB: 38 76 59 4B 3F 5B 41 79 63 07 20 2E 34 5F 19 36 2C 53 3E 6F 1A 0C 6A 20 2E 34 5F 8vYK?[Ayc...4_.6,S>o..j..4_\n", - "00.044 kB: 38 76 59 4B 3F 5B 41 79 63 07 20 2E 34 5F 19 36 2C 53 3E 6F 1A 0C 6A 20 2E 34 5F 8vYK?[Ayc...4_.6,S>o..j..4_\n", - "00.002 kB: 36 7D 6} \n", - "00.011 kB: AB E3 CD 31 5A FF 95 F1 74 EA 2B ...1Z...t.+ \n", - "00.067 kB: 0C 76 50 58 24 5F 1D 7F 50 27 49 4D 46 30 6A 59 4A 27 62 38 73 62 0E 4F 59 47 03 .vPX$_..P'IMF0jYJ'b8sb.OYG.\n", - "00.006 kB: 1B 76 5B 4D 3A 50 .v[M:P \n", - "00.004 kB: 0F 56 65 78 .Vex \n", - "00.001 kB: 3C < \n", - "00.001 kB: 6E n \n", - "00.001 kB: 39 9 \n", - "00.001 kB: 33 3 \n", - "00.001 kB: 6D m \n", - "00.001 kB: 6D m \n", - "00.006 kB: 04 7F 5F 40 36 63 .._@6c \n", - "00.007 kB: 1A 61 53 4F 26 4A 0A .aSO&J. \n", - "00.006 kB: 1B 7C 5A 49 27 5B .|ZI'[ \n", - "00.007 kB: 04 36 50 45 3F 5B 32 .6PE?[2 \n", - "00.010 kB: 04 6A 53 58 27 57 01 7D 7F 37 .jSX'W.}.7 \n", - "00.011 kB: 04 36 45 49 27 4A 06 74 6B 19 7D .6EI'J.tk.} \n", - "00.008 kB: 19 70 5A 49 1D 5F 02 7F .pZI._.. \n", - "00.010 kB: 0F 78 42 44 07 51 3C 7B 7A 0F .xBD.Q<{z. \n", - "00.006 kB: 0D 6C 58 48 3F 52 .lXH?R \n", - "00.002 kB: 16 49 .I \n", - "00.018 kB: 31 7C 42 5B 3C 4C 04 34 7C 18 4F 56 4D 71 71 42 58 23 1|B[