Skip to content

Commit

Permalink
Add and populate machines table.
Browse files Browse the repository at this point in the history
This should help to track changes to the EC2 instance.

This also moves over to use SQLAlchemy in storage.py rather than
plain sqlite3.
  • Loading branch information
awelzel committed Sep 28, 2023
1 parent be98ebe commit fd7458f
Show file tree
Hide file tree
Showing 9 changed files with 362 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""add machine table
Revision ID: 311be9937d3e
Revises: 1c1d18482b62
Create Date: 2023-09-28 12:04:49.205816
"""
from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = "311be9937d3e"
down_revision: str | None = "1c1d18482b62"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
op.create_table(
"machines",
sa.Column("id", sa.Integer, primary_key=True),
sa.Column(
"created_at", sa.DateTime, server_default=sa.text("(STRFTIME('%s'))")
),
sa.Column("dmi_sys_vendor", sa.Text),
sa.Column("dmi_product_uuid", sa.Text),
sa.Column("dmi_product_serial", sa.Text),
sa.Column("dmi_board_asset_tag", sa.Text),
sa.Column("os", sa.Text),
sa.Column("architecture", sa.Text),
sa.Column("cpu_model", sa.Text),
sa.Column("mem_total_bytes", sa.Integer),
)

op.add_column("jobs", sa.Column("machine_id", sa.Integer))


def downgrade() -> None:
op.drop_column("jobs", "machine_id")
op.drop_table("machines")
5 changes: 3 additions & 2 deletions ci-based/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ services:
# also come from the environment.
- ./config.yml:/app/config.yml

# Let the API record information about
# jobs in the database.
# Allow the API access to the host machine's machine-id.
- /etc/machine-id:/etc/machine-id

- ./persistent:/app/persistent
working_dir: /app
entrypoint:
Expand Down
64 changes: 40 additions & 24 deletions ci-based/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import datetime
import hmac
import sqlite3
import time
import unittest
from unittest import mock

from zeek_benchmarker.app import create_app, is_valid_branch_name
from zeek_benchmarker.models import Job, Machine
from zeek_benchmarker.testing import TestWithDatabase


def test_machine():
return Machine(dmi_product_uuid="ec2abcdef-1234", os="Linux")


@mock.patch("zeek_benchmarker.machine.get_machine", new_callable=lambda: test_machine)
@mock.patch("zeek_benchmarker.app.enqueue_job")
class TestApi(TestWithDatabase):
def setUp(self):
Expand Down Expand Up @@ -45,7 +50,7 @@ def hmac_digest(self, path, timestamp, build_hash):
hmac_msg = f"{path:s}-{timestamp:d}-{build_hash:s}\n".encode()
return hmac.new(self._test_hmac_key, hmac_msg, "sha256").hexdigest()

def test_zeek_good(self, enqueue_job_mock):
def test_zeek_good(self, enqueue_job_mock, get_machine_mock):
enqueue_job_mock.return_value = self.enqueue_job_result_mock

r = self._test_client.post(
Expand All @@ -67,7 +72,17 @@ def test_zeek_good(self, enqueue_job_mock):
self._test_build_hash, enqueue_job_mock.call_args[0][1]["build_hash"]
)

def test_zeek_good__more(self, enqueue_job_mock):
# Query the databse for the stored Job and Machine and
# ensure they are connected.
with self.storage.Session() as session:
jobs = session.query(Job).all()
machine = session.query(Machine).first()

self.assertEqual(1, len(jobs))
self.assertEqual("test-job-id", jobs[0].id)
self.assertEqual(machine.id, jobs[0].machine_id)

def test_zeek_good__more(self, enqueue_job_mock, get_machine_mock):
enqueue_job_mock.return_value = self.enqueue_job_result_mock

r = self._test_client.post(
Expand Down Expand Up @@ -98,25 +113,26 @@ def test_zeek_good__more(self, enqueue_job_mock):
self._test_build_hash, enqueue_job_mock.call_args[0][1]["build_hash"]
)

with sqlite3.connect(self.database_file.name) as conn:
conn.row_factory = sqlite3.Row
rows = list(conn.execute("select * from jobs"))
self.assertEqual(1, len(rows))
row = rows[0]
self.assertEqual("zeek", row["kind"])
self.assertEqual(self._test_build_hash, row["build_hash"])
self.assertEqual("f572d396fae9206628714fb2ce00f72e94f2258f", row["sha"])
self.assertEqual("test-branch", row["branch"])
self.assertEqual("test-owner", row["cirrus_repo_owner"])
self.assertEqual("test-name", row["cirrus_repo_name"])
self.assertEqual(123, row["cirrus_task_id"])
self.assertEqual("test-task-name", row["cirrus_task_name"])
self.assertEqual(9, row["cirrus_build_id"])
self.assertEqual(456, row["cirrus_pr"])
self.assertEqual(789, row["github_check_suite_id"])
self.assertEqual("6.1.0-dev.123", row["repo_version"])

def test_zeek_bad_build_url(self, enqueue_job_mock):
# Query the databse for the stored Job and Machine and
# ensure they are connected.
with self.storage.Session() as session:
jobs = session.query(Job).all()
self.assertEqual(1, len(jobs))
job = jobs[0]
self.assertEqual("zeek", job.kind)
self.assertEqual(self._test_build_hash, job.build_hash)
self.assertEqual("f572d396fae9206628714fb2ce00f72e94f2258f", job.sha)
self.assertEqual("test-branch", job.branch)
self.assertEqual("test-owner", job.cirrus_repo_owner)
self.assertEqual("test-name", job.cirrus_repo_name)
self.assertEqual(123, job.cirrus_task_id)
self.assertEqual("test-task-name", job.cirrus_task_name)
self.assertEqual(9, job.cirrus_build_id)
self.assertEqual(456, job.cirrus_pr)
self.assertEqual(789, job.github_check_suite_id)
self.assertEqual("6.1.0-dev.123", job.repo_version)

def test_zeek_bad_build_url(self, enqueue_job_mock, get_machine_mock):
enqueue_job_mock.return_value = self.enqueue_job_result_mock

r = self._test_client.post(
Expand All @@ -136,7 +152,7 @@ def test_zeek_bad_build_url(self, enqueue_job_mock):

enqueue_job_mock.assert_not_called()

def test_zeek_bad_hmac_digest(self, enqueue_job_mock):
def test_zeek_bad_hmac_digest(self, enqueue_job_mock, get_machine_mock):
enqueue_job_mock.return_value = self.enqueue_job_result_mock

r = self._test_client.post(
Expand All @@ -156,7 +172,7 @@ def test_zeek_bad_hmac_digest(self, enqueue_job_mock):

enqueue_job_mock.assert_not_called()

def test_zeek_bad_branch(self, enqueue_job_mock):
def test_zeek_bad_branch(self, enqueue_job_mock, get_machine_mock):
enqueue_job_mock.return_value = self.enqueue_job_result_mock

r = self._test_client.post(
Expand Down
68 changes: 68 additions & 0 deletions ci-based/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sqlite3

from zeek_benchmarker import storage, testing
from zeek_benchmarker.models import Machine
from zeek_benchmarker.tasks import ZeekJob, ZeekTest, ZeekTestResult


Expand All @@ -23,6 +24,58 @@ def setUp(self):

self.store = storage.Storage(self.database_file.name)

def make_test_machine(self, **kwargs):
m = Machine(
dmi_sys_vendor="LENOVO",
dmi_product_uuid="test-product-uuid",
dmi_product_serial="test-serial",
dmi_board_asset_tag="test-asset-tag",
os="Linux",
architecture="x86_64",
cpu_model="test-cpu-model",
mem_total_bytes=16458768384,
)
for k, v in kwargs.items():
setattr(m, k, v)
return m

def test_store_job(self):
"""
Store a job in the database.
"""
self.store.store_job(
job_id="test-job-id",
kind="zeek",
machine_id=421234,
req_vals={
"build_url": "test-build-url",
"build_hash": "test-build-hash",
"sha": "test-sha",
"commit": "test-sha",
"branch": "test-branch",
"original_branch": "test-original-branch",
"cirrus_repo_owner": "test-cirrus-repo-owner",
"cirrus_repo_name": "test-cirrus-repo-name",
"cirrus_task_id": "test-cirrus-task-id",
"cirrus_task_name": "test-cirrus-task-name",
"cirrus_build_id": "test-cirrus-build-id",
"cirrus_pr": "1111",
"github_check_suite_id": "22334455",
"repo_version": "test-repo-version",
},
)

with sqlite3.connect(self.database_file.name) as conn:
conn.row_factory = sqlite3.Row
rows = list(conn.execute("select * from jobs"))

# Just some smoke-checking
self.assertEqual(1, len(rows))
self.assertEqual(rows[0]["machine_id"], 421234)
self.assertEqual(rows[0]["id"], "test-job-id")
self.assertEqual(rows[0]["cirrus_pr"], 1111)
self.assertEqual(rows[0]["github_check_suite_id"], 22334455)

def test_store_zeek_result(self):
"""
Store a result in the database.
Expand Down Expand Up @@ -66,3 +119,18 @@ def test_store_zeek_error(self):
self.assertEqual(rows[0]["test_run"], 3)
self.assertFalse(rows[0]["success"])
self.assertEqual(rows[0]["error"], "Something broke")

def test_get_or_create_machine(self):
m1 = self.store.get_or_create_machine(self.make_test_machine())
m2 = self.store.get_or_create_machine(self.make_test_machine())
self.assertEqual(m1.id, m2.id)

m3 = self.store.get_or_create_machine(
self.make_test_machine(mem_total_bytes=12345678)
)
m4 = self.store.get_or_create_machine(
self.make_test_machine(mem_total_bytes=12345678)
)
self.assertEqual(m3.id, m4.id)

self.assertNotEqual(m1.id, m3.id)
7 changes: 7 additions & 0 deletions ci-based/zeek_benchmarker/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import redis
import rq
import zeek_benchmarker.machine
import zeek_benchmarker.tasks
from flask import Flask, current_app, jsonify, request
from werkzeug.exceptions import BadRequest, Forbidden
Expand Down Expand Up @@ -187,9 +188,15 @@ def zeek():

# Store information about this job, too.
store = storage.Storage(app.config["DATABASE_FILE"])

# Store the machine information with the job. The assumption
# here is that the system serving the API is also executing
# the job. Otherwise this would need to move into tasks.py.
machine = store.get_or_create_machine(zeek_benchmarker.machine.get_machine())
store.store_job(
job_id=job.id,
kind="zeek",
machine_id=machine.id,
req_vals=req_vals,
)

Expand Down
79 changes: 79 additions & 0 deletions ci-based/zeek_benchmarker/machine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
Collect information about the running machine.
Requires access to /sys for some of the DMI information
"""
import logging
import platform
from pathlib import Path

from . import models

logger = logging.getLogger(__name__)


def read_sys_dmi_id_file(
name: str, base_path: Path = Path("/sys/devices/virtual/dmi/id")
) -> str:
"""
Read /sys/devices/virtual/dmi/id/{name} and return the content, stripped.
"""
try:
return (base_path / name).read_text().strip()
except FileNotFoundError as e:
logger.warning("Could not open %s: %s", base_path / name, e)
return ""


def get_cpu_model(path: Path = Path("/proc/cpuinfo")):
"""
Seems platform.processor() isn't working well.
Parse the first model name line out of /proc/cpuinfo
model : 142
model name : Intel(R) Core(TM) i7-8565U CPU @ 1.80GHz
stepping : 12
"""
with path.open() as f:
for line in f:
if line.startswith("model name"):
return line.split(":", 1)[1].strip()

return ""


def get_mem_total_bytes(path: Path = Path("/proc/meminfo")):
"""
Parse /proc/meminfo for the MemTotal: line.
MemTotal: 16073016 kB
MemFree: 4273580 kB
"""
with path.open() as f:
for line in f:
if line.startswith("MemTotal:"):
value = line.split(":", 1)[1]
kb, suffix = value.strip().split(" ", 1)
if suffix != "kB" or not kb.isnumeric():
raise Exception(f"Unexpected value ${kb} / {suffix}")
return int(kb) * 1024

return 0


def get_machine() -> models.Machine:
"""
Collect information for this system/machine.
"""
kwargs = {}
for k in ["sys_vendor", "product_uuid", "product_serial", "board_asset_tag"]:
kwargs[f"dmi_{k}"] = read_sys_dmi_id_file(k)

kwargs["os"] = platform.system()
kwargs["architecture"] = platform.machine()
kwargs["cpu_model"] = get_cpu_model()
kwargs["mem_total_bytes"] = get_mem_total_bytes()

return models.Machine(**kwargs)
Loading

0 comments on commit fd7458f

Please sign in to comment.