diff --git a/ci-based/DEVELOPMENT.md b/ci-based/DEVELOPMENT.md index 51eac49..9663d60 100644 --- a/ci-based/DEVELOPMENT.md +++ b/ci-based/DEVELOPMENT.md @@ -1,5 +1,45 @@ # Development Notes +## Running locally + +Running `make up` will build all required container images and +start the services declared in the `docker-compose.yml` file. + +### test-http + +The docker-compose environment declares a `test-http` service that can be +used for local development. All files located in `./testing/builds` can be +fetched from the `test-http` service within the docker-compose environment. + +With the following directory structure `./testing`, the benchmarker API can +be instructed to use `http://test-http:8000/builds/zeek/build-5.2.tgz` as +the build URL. In a production environment, the build URL points at Cirrus. + + ./testing + └── builds + └── zeek + └── build-5.2.tgz + +### Submitting a Benchmarking Job + +To submit a Zeek benchmarking job with branch release/5.2, use the `tools/client.py` +utility. It defaults to an `HMAC_KEY` of `unset`, so it should just work unless +the `config.yml` was changed. + + python3 tools/client.py zeek release/5.2 \ + --api-url http://localhost:8080 \ + --build-hash 41ffacd82b02c923d53b675b113ec3bb55d320538c2c0cfb71c575a4cdb71371 \ + --build-url http://test-http:8000//builds/zeek/build-5.2.tgz \ + --cirrus-task-name ubuntu22 + {'job': {'enqueued_at': 'Tue, 10 Oct 2023 14:03:46 GMT', 'id': 'e23f8194-1aa4-40ab-a92c-5eddf0e1bf8e'}} + +If you're running `make tail-logs` in a separate terminal, logs should be +produced indicating progress. + +It is possible to use `tools/client.py` to re-submit jobs to the production API. +This requires of the correct HMAC key and only works with build artifacts already +and still stored by Cirrus. + ## Database Migrations This project is using [Alembic](https://alembic.sqlalchemy.org/en/latest/) diff --git a/ci-based/Makefile b/ci-based/Makefile index 6b95fb8..5f694cd 100644 --- a/ci-based/Makefile +++ b/ci-based/Makefile @@ -1,4 +1,4 @@ -all: zeek-benchmarker-zeek-runner +all: zeek-benchmarker-zeek-runner docker-compose-build .PHONY: @@ -6,9 +6,17 @@ zeek-benchmarker-zeek-runner: docker build -f Dockerfile . -t $@ .PHONY: -up: +docker-compose-build: + docker-compose build + +.PHONY: +up: all docker-compose up --force-recreate -d +.PHONY: +down: + docker-compose down + .PHONY: tail-logs: docker-compose logs --follow --tail=100 diff --git a/ci-based/config-tests.yml b/ci-based/config-tests.yml new file mode 100644 index 0000000..0ef2f37 --- /dev/null +++ b/ci-based/config-tests.yml @@ -0,0 +1,112 @@ +--- +ZEEK_TESTS: + - id: pcap-ixia-ent-data-center-2-30sec-500mbps + pcap_file: ixia_RamEntDataCenter2_30sec_500Mbps.pcap + + - id: pcap-zeek-testing-ipv6 + pcap_file: zeek-testing-ipv6.trace + + - id: pcap-zeek-testing-2009-m57-day11-18 + pcap_file: 2009-M57-day11-18.trace + + - id: pcap-50k-tcp-conns + pcap_file: 50k-tcp-conns.pcap + + - id: pcap-much-alexa-https-top-100 + pcap_file: much-alexa-https-top-100.pcap + + - id: pcap-much-alexa-dns-top-100 + pcap_file: much-alexa-dns-top-100.pcap + + - id: pcap-http-many-smaller + pcap_file: http_many_smaller.pcap + + - id: pcap-500k-syns + pcap_file: 500k-syns.pcap + + - id: pcap-quic-16-50mb + pcap_file: quic-16-50mb-transfers.pcap + + - id: pcap-quic-12k + pcap_file: quic-12k-connections.pcap + + - id: micro-misc-zeek-version + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/misc/zeek-version.zeek + + # Same as above, but not bare and load test-all-policy, too. + - id: micro-misc-zeek-version-all-policy + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D test-all-policy microbenchmarks/misc/zeek-version.zeek + + - id: micro-vector-ops-simple-value + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/vector-ops/simple-value.zeek + + - id: micro-vector-ops-complex-value + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/vector-ops/complex-value.zeek + + - id: micro-table-ops-simple-key-value + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/simple-key-value.zeek + + - id: micro-table-ops-complex-key-value + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/complex-key-value.zeek + + - id: micro-table-ops-complex-key-value-2 + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/complex-key-value-2.zeek + + - id: micro-table-ops-copy + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/copy.zeek + + - id: micro-table-ops-small-table-simple-key + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/small-table-simple-key.zeek + + - id: micro-table-ops-small-table-complex-key + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/table-ops/small-table-complex-key.zeek + + - id: micro-logging-tsv-one-stream + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/one-stream.zeek + + - id: micro-logging-tsv-two-streams + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/two-streams.zeek + + - id: micro-logging-json-one-stream + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/one-stream.zeek LogAscii::use_json=T + + - id: micro-logging-json-two-streams + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/two-streams.zeek LogAscii::use_json=T + + - id: micro-logging-writer-none-one-stream + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/one-stream.zeek Log::default_writer=Log::WRITER_NONE + + - id: micro-logging-writer-none-two-streams + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/logging/two-streams.zeek Log::default_writer=Log::WRITER_NONE + + - id: micro-function-calls-bifs + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/function-calls/bifs.zeek + + - id: micro-function-calls-recursion + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/function-calls/recursion.zeek + + - id: micro-function-calls-many + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/function-calls/many.zeek + + - id: micro-patterns-basic + bench_command: /benchmarker/scripts/tiny-benchmark.sh + bench_args: -D -b microbenchmarks/patterns/basic.zeek diff --git a/ci-based/config.yml b/ci-based/config.yml index 0bca119..d668e08 100644 --- a/ci-based/config.yml +++ b/ci-based/config.yml @@ -38,111 +38,8 @@ RUN_COUNT: 5 # Max time for unpacking the build archives. TAR_TIMEOUT: 20 -ZEEK_TESTS: - - id: pcap-ixia-ent-data-center-2-30sec-500mbps - pcap_file: ixia_RamEntDataCenter2_30sec_500Mbps.pcap - - - id: pcap-zeek-testing-ipv6 - pcap_file: zeek-testing-ipv6.trace - - - id: pcap-zeek-testing-2009-m57-day11-18 - pcap_file: 2009-M57-day11-18.trace - - - id: pcap-50k-tcp-conns - pcap_file: 50k-tcp-conns.pcap - - - id: pcap-much-alexa-https-top-100 - pcap_file: much-alexa-https-top-100.pcap - - - id: pcap-much-alexa-dns-top-100 - pcap_file: much-alexa-dns-top-100.pcap - - - id: pcap-http-many-smaller - pcap_file: http_many_smaller.pcap - - - id: pcap-500k-syns - pcap_file: 500k-syns.pcap - - - id: micro-misc-zeek-version - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/misc/zeek-version.zeek - - # Same as above, but not bare and load test-all-policy, too. - - id: micro-misc-zeek-version-all-policy - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D test-all-policy microbenchmarks/misc/zeek-version.zeek - - - id: micro-vector-ops-simple-value - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/vector-ops/simple-value.zeek - - - id: micro-vector-ops-complex-value - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/vector-ops/complex-value.zeek - - - id: micro-table-ops-simple-key-value - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/simple-key-value.zeek - - - id: micro-table-ops-complex-key-value - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/complex-key-value.zeek - - - id: micro-table-ops-complex-key-value-2 - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/complex-key-value-2.zeek - - - id: micro-table-ops-copy - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/copy.zeek - - - id: micro-table-ops-small-table-simple-key - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/small-table-simple-key.zeek - - - id: micro-table-ops-small-table-complex-key - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/table-ops/small-table-complex-key.zeek - - - id: micro-logging-tsv-one-stream - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/one-stream.zeek - - - id: micro-logging-tsv-two-streams - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/two-streams.zeek - - - id: micro-logging-json-one-stream - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/one-stream.zeek LogAscii::use_json=T - - - id: micro-logging-json-two-streams - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/two-streams.zeek LogAscii::use_json=T - - - id: micro-logging-writer-none-one-stream - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/one-stream.zeek Log::default_writer=Log::WRITER_NONE - - - id: micro-logging-writer-none-two-streams - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/logging/two-streams.zeek Log::default_writer=Log::WRITER_NONE - - - id: micro-function-calls-bifs - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/function-calls/bifs.zeek - - - id: micro-function-calls-recursion - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/function-calls/recursion.zeek - - - id: micro-function-calls-many - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/function-calls/many.zeek - - - id: micro-patterns-basic - bench_command: /benchmarker/scripts/tiny-benchmark.sh - bench_args: -D -b microbenchmarks/patterns/basic.zeek +# Path to a YAML file describing which tests to run. +TESTS_FILE: ./config-tests.yml rq: logging: diff --git a/ci-based/docker-compose.yml b/ci-based/docker-compose.yml index 5594b87..b168d29 100644 --- a/ci-based/docker-compose.yml +++ b/ci-based/docker-compose.yml @@ -58,6 +58,7 @@ services: dockerfile: containers/rq.Dockerfile volumes: - ./config.yml:/app/config.yml + - ./config-tests.yml:/app/config-tests.yml - /var/run/docker.sock:/var/run/docker.sock - app_spool_data:/app/spool diff --git a/ci-based/tests/test_app.py b/ci-based/tests/test_app.py index cc3e32f..96df657 100644 --- a/ci-based/tests/test_app.py +++ b/ci-based/tests/test_app.py @@ -9,11 +9,13 @@ from zeek_benchmarker.testing import TestWithDatabase -def test_machine(): +def make_test_machine(): return Machine(dmi_product_uuid="ec2abcdef-1234", os="Linux") -@mock.patch("zeek_benchmarker.machine.get_machine", new_callable=lambda: test_machine) +@mock.patch( + "zeek_benchmarker.machine.get_machine", new_callable=lambda: make_test_machine +) @mock.patch("zeek_benchmarker.app.enqueue_job") class TestApi(TestWithDatabase): def setUp(self): diff --git a/ci-based/tests/test_tasks.py b/ci-based/tests/test_tasks.py index af8bd6e..c2bf780 100644 --- a/ci-based/tests/test_tasks.py +++ b/ci-based/tests/test_tasks.py @@ -143,3 +143,30 @@ def test__runc__tmpfs(self): self.assertEqual("/run", run_kwargs["environment"]["RUN_PATH"]) self.assertEqual("", run_kwargs["tmpfs"]["/mnt/data/tmpfs"]) self.assertEqual("", run_kwargs["tmpfs"]["/run"]) + + +class TestZeekJob(unittest.TestCase): + def setUp(self): + self.job = zeek_benchmarker.tasks.ZeekJob( + build_url="test-url", + build_hash="test-hash", + original_branch="test-original-branch", + normalized_branch="test-normalized-branch", + commit="test-commit", + job_id="test-job-id", + ) + + @mock.patch("zeek_benchmarker.tasks.ZeekJob.run_zeek_test") + def test_process(self, run_zeek_test_mock): + # This uses the Config.get() singleton call to get access + # to the + # config.yml and confi-tests.yml stored in this repo. + # We use that for basic smoke testing that reading the + # config worked. + self.job._process() + + tests = [c.args[0] for c in run_zeek_test_mock.call_args_list] + ids = [t.test_id for t in tests] + self.assertGreater(len(ids), 10) + self.assertIn("micro-table-ops-copy", ids) + self.assertIn("pcap-500k-syns", ids) diff --git a/ci-based/zeek_benchmarker/app.py b/ci-based/zeek_benchmarker/app.py index adf1aeb..6b93a83 100644 --- a/ci-based/zeek_benchmarker/app.py +++ b/ci-based/zeek_benchmarker/app.py @@ -1,6 +1,7 @@ import hmac import os import time +import typing from datetime import datetime, timedelta import redis @@ -151,7 +152,7 @@ def parse_request(req): return req_vals -def enqueue_job(job_func, req_vals: dict[str, any]): +def enqueue_job(job_func, req_vals: dict[str, typing.Any]): """ Enqueue the given request vals via redis rq for processing. """ diff --git a/ci-based/zeek_benchmarker/config.py b/ci-based/zeek_benchmarker/config.py index 4480dbf..3e18a47 100644 --- a/ci-based/zeek_benchmarker/config.py +++ b/ci-based/zeek_benchmarker/config.py @@ -11,10 +11,11 @@ class SMTPSettings(typing.NamedTuple): class Config: - _config: "Config" = None + _config: typing.Optional["Config"] = None def __init__(self, d: dict[str, typing.Any]): self._d = d + self._tests_d: dict[str, typing.Any] | None = None @property def work_dir(self) -> str: @@ -32,6 +33,14 @@ def zeek_cpus(self) -> str: def run_count(self) -> int: return self._d["RUN_COUNT"] + @property + def zeek_tests(self) -> list[dict[str, typing.Any]]: + if self._tests_d is None: + with open(self["TESTS_FILE"]) as fp: + self._tests_d = yaml.safe_load(fp) + + return self._tests_d["ZEEK_TESTS"] + def __getitem__(self, k: str, default: typing.Any = None): """ Allow dictionary key lookups. @@ -51,7 +60,7 @@ def smtp_settings(self) -> SMTPSettings: ) -def get(): +def get() -> Config: """ Lazily load the config. """ diff --git a/ci-based/zeek_benchmarker/logging.py b/ci-based/zeek_benchmarker/logging.py index 5ff9011..23f3c36 100644 --- a/ci-based/zeek_benchmarker/logging.py +++ b/ci-based/zeek_benchmarker/logging.py @@ -15,13 +15,13 @@ def __init__( *, subject_prefix: str, toaddrs: list[str] | str, - cfg: config.Config = None, + cfg: config.Config | None = None, ): self.subject_prefix = subject_prefix cfg = cfg or config.get() super().__init__( toaddrs=toaddrs, - subject=None, # dynamically generated + subject="", # dynamically generated with getSubject() secure=(), # always secure **cfg.smtp_settings._asdict(), ) diff --git a/ci-based/zeek_benchmarker/storage.py b/ci-based/zeek_benchmarker/storage.py index 9e7ed79..3a56c6a 100644 --- a/ci-based/zeek_benchmarker/storage.py +++ b/ci-based/zeek_benchmarker/storage.py @@ -4,8 +4,10 @@ Really using sqlite directly, but this allows to test it some. """ import sqlite3 +import typing import sqlalchemy as sa +import zeek_benchmarker.tasks from . import config, models @@ -30,7 +32,7 @@ def store_job( job_id: str, kind: str, machine_id: int, - req_vals: dict[str, any], + req_vals: dict[str, typing.Any], ): with sqlite3.connect(self._filename) as conn: c = conn.cursor() @@ -79,9 +81,9 @@ def store_job( def store_zeek_result( self, *, - job: "zeek_benchmarker.tasks.ZeekJob", # noqa: F821 - test: "zeek_benchmarker.tasks.ZeekTest", # noqa: F821 - result: "zeek_benchmarker.tasks.ZeekTestResult", # noqa: F821 + job: "zeek_benchmarker.tasks.ZeekJob", + test: "zeek_benchmarker.tasks.ZeekTest", + result: "zeek_benchmarker.tasks.ZeekTestResult", ): """ Store a results entry into the zeek_tests table. diff --git a/ci-based/zeek_benchmarker/tasks.py b/ci-based/zeek_benchmarker/tasks.py index 497da8e..60ba56e 100644 --- a/ci-based/zeek_benchmarker/tasks.py +++ b/ci-based/zeek_benchmarker/tasks.py @@ -54,7 +54,7 @@ class ContainerRunner: the same service with different environment variables. """ - _instance: "ContainerRunner" = None + _instance: typing.Optional["ContainerRunner"] = None @staticmethod def get() -> "ContainerRunner": @@ -72,12 +72,11 @@ def runc( image: str, command: str, env: Env, - seccomp_profile: dict[str, any], # contents of the seccomp profile + seccomp_profile: dict[str, typing.Any], # contents of the seccomp profile install_volume: str, install_target: str, test_data_volume: str, test_data_target: str = "/test_data", - timeout: float = None, cap_add: list[str] | None = None, network_disabled: bool = True, ): @@ -254,29 +253,29 @@ class Job: job_id: str # build_hash rewritten - sha256: str = None + sha256: str | None = None - job_dir: pathlib.Path = None + job_dir: pathlib.Path | None = None # Absolute path to downloaded file. - build_path: str = None + build_path: str | None = None # Just the filename from build_path - build_filename: str = None + build_filename: str | None = None # This is the actual original branch # name as submitted through the API. - branch: str = None + branch: str | None = None # Extra information from the API - cirrus_repo_owner: str = None - cirrus_repo_name: str = None - cirrus_task_id: int = None - cirrus_task_name: str = None - cirrus_build_id: int = None - cirrus_pr: int = None - cirrus_pr_labels: str = None - github_check_suite_id: int = None - repo_version: str = None + cirrus_repo_owner: str | None = None + cirrus_repo_name: str | None = None + cirrus_task_id: int | None = None + cirrus_task_name: str | None = None + cirrus_build_id: int | None = None + cirrus_pr: int | None = None + cirrus_pr_labels: str | None = None + github_check_suite_id: int | None = None + repo_version: str | None = None @property def install_volume(self) -> str: @@ -402,13 +401,13 @@ def parse_from(test_run: int, output: bytes): class ZeekTest(typing.NamedTuple): test_id: str runs: int - pcap: str = None - bench_command: str = None - bench_args: str = None - skip: bool = None + pcap: str | None = None + bench_command: str | None = None + bench_args: str | None = None + skip: bool | None = None @staticmethod - def from_dict(cfg: config.Config, d: dict[str, any]): + def from_dict(cfg: config.Config, d: dict[str, typing.Any]): return ZeekTest( test_id=d["id"], runs=d.get("runs", cfg.run_count), @@ -496,7 +495,7 @@ def run_zeek_test(self, t): def _process(self): cfg = config.get() - for t in cfg["ZEEK_TESTS"]: + for t in cfg.zeek_tests: zeek_test = ZeekTest.from_dict(cfg, t) self.run_zeek_test(zeek_test)