Skip to content

Commit

Permalink
test testfiles consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-tz committed Dec 7, 2020
1 parent d3fefaa commit c6eb03c
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 2 deletions.
99 changes: 99 additions & 0 deletions .github/check_sample_filenames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Check testfiles data directory for consistent naming.
Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""

import os
import sys
import string
import hashlib
import logging
import os.path
import argparse

logger = logging.getLogger("capa.tests.data")

IGNORED_EXTS = (".md", ".git", ".gitattributes", ".gitignore")
VALID_EXTS = (".exe_", ".dll_", ".sys_", ".raw32", ".raw64")


def main(argv=None):
if argv is None:
argv = sys.argv[1:]

parser = argparse.ArgumentParser()
parser.add_argument("testfiles", type=str, help="Path to tests/data")
args = parser.parse_args(args=argv)

test_failed = test_data_filenames(args)
if test_failed:
return 1
else:
logger.info("test files look good!")
return 0


def test_data_filenames(args):
test_failed = False
for root, dirs, files in os.walk(args.testfiles):
if ".git" in root or ".github" in root:
continue

for filename in files:
if filename.endswith(IGNORED_EXTS):
continue

path = os.path.join(root, filename)

if not filename.endswith(VALID_EXTS):
logger.error("invalid file extension: %s", path)
test_failed = True
continue

name, ext = os.path.splitext(filename)
if all(c in string.hexdigits for c in name):
try:
hashes = get_file_hashes(path)
except IOError:
continue

# MD5 file name
if len(name) == 32:
if hashes["md5"] != name:
logger.error("invalid file name: %s, MD5 hash: %s", path, hashes["md5"])
test_failed = True
# SHA256 file name
elif len(name) == 64:
if hashes["sha256"] != name:
logger.error("invalid file name: %s, SHA256 hash: %s", path, hashes["sha256"])
test_failed = True
else:
logger.error("invalid file name: %s, should be MD5 or SHA256 hash", path)
test_failed = True

return test_failed


def get_file_hashes(path):
with open(path, "rb") as f:
buf = f.read()

md5 = hashlib.md5()
md5.update(buf)

sha256 = hashlib.sha256()
sha256.update(buf)

return {"md5": md5.hexdigest().lower(), "sha256": sha256.hexdigest().lower()}


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
sys.exit(main())
20 changes: 20 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Tests

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
test_filenames:
runs-on: ubuntu-latest
steps:
- name: Checkout testfiles repository
uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Test repository files
run: python .github/check_sample_filenames.py .
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.viv
*.json
*.idb
*.i64
File renamed without changes.
File renamed without changes.
File renamed without changes.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Resources for testing capa
Data to test capa's [code](https://github.com/fireeye/capa) and [rules](https://github.com/fireeye/capa-rules).

## Naming conventions
We use the following conventions to organize the capa test data.

- File name
- MD5 or SHA256 hash, all lower case, e.g.
- `d41d8cd98f00b204e9800998ecf8427e`
- `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
- Descriptive name, e.g.
- `kernel32`
- `Practical Malware Analysis Lab 01-01`
- File extension
- `.exe_`
- `.dll_`
- `.sys_`
- `.raw32` (32-bit shellcode)
- `.raw64` (64-bit shellcode)
1 change: 0 additions & 1 deletion readme.md

This file was deleted.

0 comments on commit c6eb03c

Please sign in to comment.