From c6eb03ce76f33bb93b338bf9fa120b6fbb83abdb Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Wed, 11 Nov 2020 12:28:58 +0100 Subject: [PATCH] test testfiles consistency --- .github/check_sample_filenames.py | 99 ++++++++++++++++++ .github/workflows/tests.yml | 20 ++++ .gitignore | 3 +- ...6 => 2055994ff75b4309eee3a49c5749d306.exe_ | Bin ...9 => 2f43138aa75fb12ac482b486cbc98569.dll_ | Bin ..._ => 82bf6347acf15e5d883715dc289d8a2b.exe_ | Bin ...75dde6fd0561305d9f8307be7bb11d02ae363.exe_ | Bin README.md | 19 ++++ readme.md | 1 - 9 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 .github/check_sample_filenames.py create mode 100644 .github/workflows/tests.yml rename 2055994ff75b4309eee3a49c5749d306 => 2055994ff75b4309eee3a49c5749d306.exe_ (100%) rename 2f43138aa75fb12ac482b486cbc98569 => 2f43138aa75fb12ac482b486cbc98569.dll_ (100%) rename 82BF6347ACF15E5D883715DC289D8A2B.exe_ => 82bf6347acf15e5d883715dc289d8a2b.exe_ (100%) rename 8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae36.exe_ => 8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae363.exe_ (100%) create mode 100644 README.md delete mode 100644 readme.md diff --git a/.github/check_sample_filenames.py b/.github/check_sample_filenames.py new file mode 100644 index 0000000..d415293 --- /dev/null +++ b/.github/check_sample_filenames.py @@ -0,0 +1,99 @@ +""" +Check testfiles data directory for consistent naming. + +Copyright (C) 2020 FireEye, Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. +You may obtain a copy of the License at: [package root]/LICENSE.txt +Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +""" + +import os +import sys +import string +import hashlib +import logging +import os.path +import argparse + +logger = logging.getLogger("capa.tests.data") + +IGNORED_EXTS = (".md", ".git", ".gitattributes", ".gitignore") +VALID_EXTS = (".exe_", ".dll_", ".sys_", ".raw32", ".raw64") + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + parser = argparse.ArgumentParser() + parser.add_argument("testfiles", type=str, help="Path to tests/data") + args = parser.parse_args(args=argv) + + test_failed = test_data_filenames(args) + if test_failed: + return 1 + else: + logger.info("test files look good!") + return 0 + + +def test_data_filenames(args): + test_failed = False + for root, dirs, files in os.walk(args.testfiles): + if ".git" in root or ".github" in root: + continue + + for filename in files: + if filename.endswith(IGNORED_EXTS): + continue + + path = os.path.join(root, filename) + + if not filename.endswith(VALID_EXTS): + logger.error("invalid file extension: %s", path) + test_failed = True + continue + + name, ext = os.path.splitext(filename) + if all(c in string.hexdigits for c in name): + try: + hashes = get_file_hashes(path) + except IOError: + continue + + # MD5 file name + if len(name) == 32: + if hashes["md5"] != name: + logger.error("invalid file name: %s, MD5 hash: %s", path, hashes["md5"]) + test_failed = True + # SHA256 file name + elif len(name) == 64: + if hashes["sha256"] != name: + logger.error("invalid file name: %s, SHA256 hash: %s", path, hashes["sha256"]) + test_failed = True + else: + logger.error("invalid file name: %s, should be MD5 or SHA256 hash", path) + test_failed = True + + return test_failed + + +def get_file_hashes(path): + with open(path, "rb") as f: + buf = f.read() + + md5 = hashlib.md5() + md5.update(buf) + + sha256 = hashlib.sha256() + sha256.update(buf) + + return {"md5": md5.hexdigest().lower(), "sha256": sha256.hexdigest().lower()} + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + sys.exit(main()) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..1f24e58 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,20 @@ +name: Tests + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test_filenames: + runs-on: ubuntu-latest + steps: + - name: Checkout testfiles repository + uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Test repository files + run: python .github/check_sample_filenames.py . diff --git a/.gitignore b/.gitignore index 99349b6..ae66482 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.viv -*.json +*.idb +*.i64 diff --git a/2055994ff75b4309eee3a49c5749d306 b/2055994ff75b4309eee3a49c5749d306.exe_ similarity index 100% rename from 2055994ff75b4309eee3a49c5749d306 rename to 2055994ff75b4309eee3a49c5749d306.exe_ diff --git a/2f43138aa75fb12ac482b486cbc98569 b/2f43138aa75fb12ac482b486cbc98569.dll_ similarity index 100% rename from 2f43138aa75fb12ac482b486cbc98569 rename to 2f43138aa75fb12ac482b486cbc98569.dll_ diff --git a/82BF6347ACF15E5D883715DC289D8A2B.exe_ b/82bf6347acf15e5d883715dc289d8a2b.exe_ similarity index 100% rename from 82BF6347ACF15E5D883715DC289D8A2B.exe_ rename to 82bf6347acf15e5d883715dc289d8a2b.exe_ diff --git a/8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae36.exe_ b/8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae363.exe_ similarity index 100% rename from 8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae36.exe_ rename to 8ba66e4b618ffdc8255f1df01f875dde6fd0561305d9f8307be7bb11d02ae363.exe_ diff --git a/README.md b/README.md new file mode 100644 index 0000000..801bb99 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# Resources for testing capa +Data to test capa's [code](https://github.com/fireeye/capa) and [rules](https://github.com/fireeye/capa-rules). + +## Naming conventions +We use the following conventions to organize the capa test data. + +- File name + - MD5 or SHA256 hash, all lower case, e.g. + - `d41d8cd98f00b204e9800998ecf8427e` + - `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855` + - Descriptive name, e.g. + - `kernel32` + - `Practical Malware Analysis Lab 01-01` +- File extension + - `.exe_` + - `.dll_` + - `.sys_` + - `.raw32` (32-bit shellcode) + - `.raw64` (64-bit shellcode) diff --git a/readme.md b/readme.md deleted file mode 100644 index eefccc9..0000000 --- a/readme.md +++ /dev/null @@ -1 +0,0 @@ -# Resources for testing Capa