From 94c61e62b992c836d492910a88f629f28f56e2dd Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 09:31:09 +0100 Subject: [PATCH 1/7] remove SSH to Controller for job status --- .env.example | 6 +- Makefile | 14 ---- README.md | 3 - docker-compose.yml | 2 - init.sh | 21 ------ ocrdmonitor/database/_ocrdjobrepository.py | 1 - ocrdmonitor/environment.py | 3 - ocrdmonitor/ocrdcontroller.py | 25 ------- ocrdmonitor/processstatus.py | 58 ---------------- ocrdmonitor/protocols.py | 12 ---- ocrdmonitor/server/jobs.py | 28 +------- ocrdmonitor/server/settings.py | 7 -- ocrdmonitor/server/templates/jobs.html.j2 | 20 ++---- ocrdmonitor/sshremote.py | 68 ------------------- .../server/fixtures/environment.py | 10 --- tests/ocrdmonitor/server/fixtures/settings.py | 5 -- tests/ocrdmonitor/server/test_job_endpoint.py | 2 - tests/ocrdmonitor/server/test_settings.py | 8 --- tests/ocrdmonitor/test_processstatus.py | 43 ------------ tests/ocrdmonitor/test_sshremote.py | 46 ------------- 20 files changed, 9 insertions(+), 373 deletions(-) delete mode 100644 ocrdmonitor/ocrdcontroller.py delete mode 100644 ocrdmonitor/processstatus.py delete mode 100644 ocrdmonitor/sshremote.py delete mode 100644 tests/ocrdmonitor/test_processstatus.py delete mode 100644 tests/ocrdmonitor/test_sshremote.py diff --git a/.env.example b/.env.example index d55c1fb..fb13bda 100644 --- a/.env.example +++ b/.env.example @@ -1,8 +1,4 @@ -CONTROLLER_HOST=ocrd-controller -CONTROLLER_PORT_SSH=22 - -MANAGER_DATA=~/.ssh/id_rsa -MANAGER_KEY=~/ +MANAGER_DATA=~/ MANAGER_HOST=ocrd-manager MANAGER_PORT_WEB=4004 diff --git a/Makefile b/Makefile index c883c78..6a0ecd2 100644 --- a/Makefile +++ b/Makefile @@ -28,44 +28,30 @@ Variables: currently: "$(TAGNAME)" - MONITOR_PORT_WEB TCP port for the (host-side) web server currently: $(MONITOR_PORT_WEB) - - MANAGER_KEY SSH key file to mount (for the Controller client) - currently: "$(MANAGER_KEY)" - MANAGER_DATA host directory to mount into `/data` (shared with Manager) currently: "$(MANAGER_DATA)" - MANAGER_WORKFLOWS host directory to mount into `/workflows` (shared with Manager) currently: "$(MANAGER_WORKFLOWS)" - NETWORK Docker network to use (manage via "docker network") currently: $(NETWORK) - - CONTROLLER_HOST network address for the Controller client - (must be reachable from the container network) - currently: $(CONTROLLER_HOST) - - CONTROLLER_PORT_SSH network port for the Controller client - (must be reachable from the container network) - currently: $(CONTROLLER_PORT_SSH) EOF endef export HELP help: ; @eval "$$HELP" -MANAGER_KEY ?= $(firstword $(filter-out %.pub,$(wildcard $(HOME)/.ssh/id_*))) MANAGER_DATA ?= $(CURDIR) MANAGER_WORKFLOWS ?= $(CURDIR) MONITOR_PORT_WEB ?= 5000 NETWORK ?= bridge -CONTROLLER_HOST ?= $(shell dig +short $$HOSTNAME) -CONTROLLER_PORT_SSH ?= 8022 run: $(DATA) docker run -d --rm \ -h ocrd_monitor \ --name ocrd_monitor \ --network=$(NETWORK) \ -p $(MONITOR_PORT_WEB):5000 \ - -v ${MANAGER_KEY}:/id_rsa \ - --mount type=bind,source=$(MANAGER_KEY),target=/id_rsa \ -v $(MANAGER_DATA):/data \ -v $(MANAGER_WORKFLOWS):/workflows \ -v shared:/run/lock/ocrd.jobs \ - -e CONTROLLER=$(CONTROLLER_HOST):$(CONTROLLER_PORT_SSH) \ -e MONITOR_PORT_LOG=${MONITOR_PORT_LOG} \ $(TAGNAME) diff --git a/README.md b/README.md index b8cc38d..9d879a3 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,8 @@ In order to work properly, the following **environment variables** must be set: | Variable | Description | | ------------------- | -------------------------------------------------------------------------------- | -| CONTROLLER_HOST | Hostname of the OCR-D Controller | -| CONTROLLER_PORT_SSH | Port on the OCR-D Controller host that allows a SSH connection | | MANAGER_DATA | Path to the OCR-D workspaces on the host | | MANAGER_WORKFLOWS | Path to the OCR-D workflows on the host | -| MANAGER_KEY | Path to a private key that can be used to authenticate with the OCR-D Controller | | MONITOR_PORT_WEB | The port at which the OCR-D Monitor will be available on the host | | MONITOR_PORT_LOG | The port at which the Dozzle logs will be available on the host | diff --git a/docker-compose.yml b/docker-compose.yml index dd69359..b53bda6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,6 @@ services: hostname: ${MONITOR_HOST} environment: - CONTROLLER: "${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}" MANAGER_URL: "http://${MANAGER_HOST}:${MANAGER_PORT_WEB}" MONITOR_PORT_LOG: ${MONITOR_PORT_LOG} MONITOR_DB_CONNECTION: "mongodb://${MONITOR_DB_ROOT_USER:-root}:${MONITOR_DB_ROOT_PASSWORD:-root_password}@ocrd-database:27017" @@ -28,7 +27,6 @@ services: volumes: - ${MANAGER_DATA}:/data - ${MANAGER_WORKFLOWS}:/workflows - - ${MANAGER_KEY}:/id_rsa - shared:/run/lock/ocrd.jobs ocrd-logview: diff --git a/init.sh b/init.sh index c003376..93179de 100755 --- a/init.sh +++ b/init.sh @@ -1,31 +1,10 @@ #!/usr/bin/env bash -mkdir -p ~/.ssh -cat /id_rsa >> ~/.ssh/id_rsa -chmod go-rw ~/.ssh/id_rsa - -# Add ocrd controller as global and known_hosts if env exist -if [ -n "$CONTROLLER" ]; then - CONTROLLER_HOST=${CONTROLLER%:*} - CONTROLLER_PORT=${CONTROLLER#*:} - CONTROLLER_IP=$(nslookup $CONTROLLER_HOST | grep 'Address\:' | awk 'NR==2 {print $2}') - - if test -e /etc/ssh/ssh_known_hosts; then - ssh-keygen -R $CONTROLLER_HOST -f /etc/ssh/ssh_known_hosts - ssh-keygen -R $CONTROLLER_IP -f /etc/ssh/ssh_known_hosts - fi - ssh-keyscan -H -p ${CONTROLLER_PORT:-22} $CONTROLLER_HOST,$CONTROLLER_IP >>/etc/ssh/ssh_known_hosts -fi - export MONITOR_DB_CONNECTION_STRING=$MONITOR_DB_CONNECTION export OCRD_BROWSER__MODE=native export OCRD_BROWSER__WORKSPACE_DIR=/data/ocr-d export OCRD_BROWSER__PORT_RANGE="[9000,9100]" export OCRD_LOGVIEW__PORT=$MONITOR_PORT_LOG -export OCRD_CONTROLLER__HOST=$CONTROLLER_HOST -export OCRD_CONTROLLER__PORT=$CONTROLLER_PORT -export OCRD_CONTROLLER__USER=admin -export OCRD_CONTROLLER__KEYFILE=~/.ssh/id_rsa export OCRD_MANAGER__URL=$MANAGER_URL cd /usr/local/ocrd-monitor diff --git a/ocrdmonitor/database/_ocrdjobrepository.py b/ocrdmonitor/database/_ocrdjobrepository.py index dd07d35..2e66a09 100644 --- a/ocrdmonitor/database/_ocrdjobrepository.py +++ b/ocrdmonitor/database/_ocrdjobrepository.py @@ -20,7 +20,6 @@ class MongoOcrdJob(Document): workdir: Path remotedir: str workflow_file: Path - controller_address: str class Settings: name = "OcrdJob" diff --git a/ocrdmonitor/environment.py b/ocrdmonitor/environment.py index 6a01f5e..8bbd5fb 100644 --- a/ocrdmonitor/environment.py +++ b/ocrdmonitor/environment.py @@ -40,6 +40,3 @@ async def repositories(self) -> Repositories: def browser_factory(self) -> OcrdBrowserFactory: port_range_set = set(range(*self.settings.ocrd_browser.port_range)) return CreatingFactories[self.settings.ocrd_browser.mode](port_range_set) - - def controller_server(self) -> RemoteServer: - return SSHRemote(self.settings.ocrd_controller) diff --git a/ocrdmonitor/ocrdcontroller.py b/ocrdmonitor/ocrdcontroller.py deleted file mode 100644 index f869946..0000000 --- a/ocrdmonitor/ocrdcontroller.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -from ocrdmonitor.processstatus import ProcessState, ProcessStatus -from ocrdmonitor.protocols import OcrdJob, RemoteServer - - -class OcrdController: - def __init__(self, remote: RemoteServer) -> None: - self._remote = remote - - async def status_for(self, ocrd_job: OcrdJob) -> ProcessStatus | None: - if ocrd_job.remotedir is None: - return None - - pid = await self._remote.read_file(f"/data/{ocrd_job.remotedir}/ocrd.pid") - process_statuses = await self._remote.process_status(int(pid)) - - for status in process_statuses: - if status.state == ProcessState.RUNNING: - return status - - if process_statuses: - return process_statuses[0] - - return None diff --git a/ocrdmonitor/processstatus.py b/ocrdmonitor/processstatus.py deleted file mode 100644 index 433284b..0000000 --- a/ocrdmonitor/processstatus.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from datetime import timedelta -from enum import Enum - - -class ProcessState(Enum): - # see ps(1)#PROCESS_STATE_CODES - RUNNING = "R" - SLEEPING = "S" - SLEEPIO = "D" - STOPPED = "T" - TRACING = "t" - ZOMBIE = "Z" - UNKNOWN = "?" - - def __str__(self) -> str: - return str(self.name) - - -@dataclass(frozen=True) -class ProcessStatus: - pid: int - state: ProcessState - percent_cpu: float - memory: int - cpu_time: timedelta - - @classmethod - def shell_command(cls, pid: int) -> str: - return f"ps -s {pid} -o pid,state,%cpu,rss,cputime --no-headers" - - @classmethod - def from_shell_output(cls, ps_output: str) -> list["ProcessStatus"]: - def is_error(lines: list[str]) -> bool: - return lines[0].startswith("error:") - - def parse_line(line: str) -> "ProcessStatus": - pid, state, percent_cpu, memory, cpu_time, *_ = line.split() - return cls( - pid=int(pid), - state=ProcessState(state[0]), - percent_cpu=float(percent_cpu), - memory=int(memory), - cpu_time=timedelta(seconds=_cpu_time_to_seconds(cpu_time)), - ) - - lines = ps_output.strip().splitlines() - if not lines or is_error(lines): - return [] - - return [parse_line(line) for line in lines] - - -def _cpu_time_to_seconds(cpu_time: str) -> int: - hours, minutes, seconds, *_ = cpu_time.split(":") - return int(hours) * 3600 + int(minutes) * 60 + int(seconds) diff --git a/ocrdmonitor/protocols.py b/ocrdmonitor/protocols.py index 066c0bf..5a37c84 100644 --- a/ocrdmonitor/protocols.py +++ b/ocrdmonitor/protocols.py @@ -4,7 +4,6 @@ from typing import Collection, NamedTuple, Protocol from ocrdbrowser import OcrdBrowser, OcrdBrowserFactory -from ocrdmonitor.processstatus import ProcessStatus from ocrdmonitor.server.settings import Settings @@ -49,7 +48,6 @@ class OcrdJob: workdir: Path remotedir: str workflow_file: Path - controller_address: str @property def is_running(self) -> bool: @@ -72,13 +70,6 @@ async def find_all(self) -> list[OcrdJob]: ... -class RemoteServer(Protocol): - async def read_file(self, path: str) -> str: - ... - - async def process_status(self, process_group: int) -> list[ProcessStatus]: - ... - class Repositories(NamedTuple): browser_processes: BrowserProcessRepository ocrd_jobs: JobRepository @@ -92,6 +83,3 @@ async def repositories(self) -> Repositories: def browser_factory(self) -> OcrdBrowserFactory: ... - - def controller_server(self) -> RemoteServer: - ... diff --git a/ocrdmonitor/server/jobs.py b/ocrdmonitor/server/jobs.py index fe838d2..1eb5458 100644 --- a/ocrdmonitor/server/jobs.py +++ b/ocrdmonitor/server/jobs.py @@ -8,7 +8,6 @@ from fastapi.responses import JSONResponse from fastapi.templating import Jinja2Templates -from ocrdmonitor.ocrdcontroller import OcrdController from ocrdmonitor.processstatus import ProcessStatus from ocrdmonitor.protocols import Environment, OcrdJob, Repositories @@ -16,12 +15,6 @@ import logging -@dataclass -class RunningJob: - ocrd_job: OcrdJob - process_status: ProcessStatus - - def split_into_running_and_completed( jobs: Iterable[OcrdJob], ) -> tuple[list[OcrdJob], list[OcrdJob]]: @@ -30,25 +23,11 @@ def split_into_running_and_completed( return running_ocrd_jobs, completed_ocrd_jobs -def wrap_in_running_job_type( - running_ocrd_jobs: Iterable[OcrdJob], - job_status: Iterable[ProcessStatus | None], -) -> Iterable[RunningJob]: - running_jobs = [ - RunningJob(job, process_status) - for job, process_status in zip(running_ocrd_jobs, job_status) - if process_status is not None - ] - - return running_jobs - - def create_jobs( templates: Jinja2Templates, environment: Environment, ) -> APIRouter: router = APIRouter(prefix="/jobs") - controller = OcrdController(environment.controller_server()) @router.get("/", name="jobs") async def jobs( @@ -58,17 +37,14 @@ async def jobs( jobs = await job_repository.find_all() running, completed = split_into_running_and_completed(jobs) - job_status = [await controller.status_for(job) for job in running] - running_jobs = wrap_in_running_job_type(running, job_status) - now = datetime.now(timezone.utc) return templates.TemplateResponse( "jobs.html.j2", { "request": request, "running_jobs": sorted( - running_jobs, - key=lambda x: x.ocrd_job.time_created or now, + running, + key=lambda x: x.time_created or now, ), "completed_jobs": sorted( completed, diff --git a/ocrdmonitor/server/settings.py b/ocrdmonitor/server/settings.py index b5c88a3..6fbec0c 100644 --- a/ocrdmonitor/server/settings.py +++ b/ocrdmonitor/server/settings.py @@ -34,12 +34,6 @@ } -class OcrdControllerSettings(BaseSettings): - host: str - user: str - port: int = 22 - keyfile: Path = Path.home() / ".ssh" / "id_rsa" - class OcrdManagerSettings(BaseSettings): url: str @@ -78,7 +72,6 @@ class Settings(BaseSettings): monitor_db_connection_string: str ocrd_browser: OcrdBrowserSettings - ocrd_controller: OcrdControllerSettings ocrd_logview: OcrdLogViewSettings ocrd_manager: OcrdManagerSettings diff --git a/ocrdmonitor/server/templates/jobs.html.j2 b/ocrdmonitor/server/templates/jobs.html.j2 index 05c2459..eab8f3e 100644 --- a/ocrdmonitor/server/templates/jobs.html.j2 +++ b/ocrdmonitor/server/templates/jobs.html.j2 @@ -32,27 +32,19 @@ PROCESS ID WORKFLOW PID - STATUS - % CPU - MB RSS - DURATION ACTION {% for job in running_jobs: %} - {{ job.ocrd_job.time_created }} - {{ job.ocrd_job.task_id }} - {{ job.ocrd_job.process_id }} - {{ job.ocrd_job.workflow + {{ job.time_created }} + {{ job.task_id }} + {{ job.process_id }} + {{ job.workflow }} - {{ job.process_status.pid }} - {{ job.process_status.state }} - {{ job.process_status.percent_cpu }} - {{ job.process_status.memory }} - {{ job.process_status.cpu_time }} - + {{ job.pid }} + {% endfor %} diff --git a/ocrdmonitor/sshremote.py b/ocrdmonitor/sshremote.py deleted file mode 100644 index 598255a..0000000 --- a/ocrdmonitor/sshremote.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -import asyncio -import logging -import shlex -from pathlib import Path -from typing import Protocol - -from ocrdmonitor.processstatus import ProcessStatus - - -class SSHConfig(Protocol): - host: str - port: int - user: str - keyfile: Path - - -class SSHRemote: - def __init__(self, config: SSHConfig) -> None: - self._config = config - - async def read_file(self, path: str) -> str: - result = await asyncio.create_subprocess_shell( - _ssh(self._config, f"cat {path}"), - stdout=asyncio.subprocess.PIPE, - ) - await result.wait() - - if not result.stdout: - return "" - - return (await result.stdout.read()).decode() - - async def process_status(self, process_group: int) -> list[ProcessStatus]: - pid_cmd = ProcessStatus.shell_command(process_group) - result = await asyncio.create_subprocess_shell( - _ssh(self._config, pid_cmd), - stdout=asyncio.subprocess.PIPE, - ) - - if await result.wait() > 0: - logging.error( - f"checking status of process {process_group} failed: {result.stderr}" - ) - return [] - - if not result.stdout: - return [] - - output = (await result.stdout.read()).decode() - return ProcessStatus.from_shell_output(output) - - -def _ssh(config: SSHConfig, cmd: str) -> str: - return shlex.join( - ( - "ssh", - "-o", - "StrictHostKeyChecking=no", - "-i", - str(config.keyfile), - "-p", - str(config.port), - f"{config.user}@{config.host}", - *shlex.split(cmd), - ) - ) diff --git a/tests/ocrdmonitor/server/fixtures/environment.py b/tests/ocrdmonitor/server/fixtures/environment.py index 12a17c2..a9074b7 100644 --- a/tests/ocrdmonitor/server/fixtures/environment.py +++ b/tests/ocrdmonitor/server/fixtures/environment.py @@ -47,7 +47,6 @@ class DevEnvironment: settings: Settings _repositories: Repositories _factory: BrowserTestDoubleFactory - controller_remote: RemoteServer = RemoteDummy() _app: TestClient = field(init=False) @@ -60,9 +59,6 @@ async def repositories(self) -> Repositories: def browser_factory(self) -> OcrdBrowserFactory: return self._factory - def controller_server(self) -> RemoteServer: - return self.controller_remote - @property def app(self) -> TestClient: return self._app @@ -79,7 +75,6 @@ class Fixture: def __init__(self) -> None: self.browser_constructor: BrowserConstructor = BrowserSpy self.repo_constructor: RepositoryInitializer = inmemory_repository - self.remote_controller: RemoteServer = RemoteDummy() self.existing_browsers: list[BrowserTestDouble] = [] self.session_id = "" @@ -97,10 +92,6 @@ def with_running_browsers(self, *browsers: BrowserTestDouble) -> Self: self.existing_browsers = list(browsers) return self - def with_controller_remote(self, remote: RemoteServer) -> Self: - self.remote_controller = remote - return self - def with_session_id(self, session_id: str) -> Self: self.session_id = session_id return self @@ -115,7 +106,6 @@ async def __aenter__(self) -> DevEnvironment: create_settings(), _factory=factory, _repositories=repositories, - controller_remote=self.remote_controller, ) self._init_app(env.app) diff --git a/tests/ocrdmonitor/server/fixtures/settings.py b/tests/ocrdmonitor/server/fixtures/settings.py index 75e3ce4..6ce7f85 100644 --- a/tests/ocrdmonitor/server/fixtures/settings.py +++ b/tests/ocrdmonitor/server/fixtures/settings.py @@ -2,7 +2,6 @@ from ocrdmonitor.server.settings import ( OcrdBrowserSettings, - OcrdControllerSettings, OcrdLogViewSettings, OcrdManagerSettings, Settings, @@ -19,10 +18,6 @@ def create_settings() -> Settings: workspace_dir=WORKSPACE_DIR, port_range=(9000, 9100), ), - ocrd_controller=OcrdControllerSettings( - host="", - user="", - ), ocrd_logview=OcrdLogViewSettings(port=8022), ocrd_manager=OcrdManagerSettings(url="https://manager.ocrdhost.com") ) diff --git a/tests/ocrdmonitor/server/test_job_endpoint.py b/tests/ocrdmonitor/server/test_job_endpoint.py index b297b4b..0307b5c 100644 --- a/tests/ocrdmonitor/server/test_job_endpoint.py +++ b/tests/ocrdmonitor/server/test_job_endpoint.py @@ -28,7 +28,6 @@ def job_template() -> OcrdJob: workdir=Path("ocr-d/data/5432"), workflow_file=Path("ocr-workflow-default.sh"), remotedir="/remote/job/dir", - controller_address="controller.ocrdhost.com", time_created=created_at, time_terminated=terminated_at, ) @@ -61,7 +60,6 @@ async def test__given_a_running_ocrd_job__the_job_endpoint_lists_it_with_resourc pid = 1234 expected_status = make_status(pid) remote_stub = RemoteServerStub(expected_status) - fixture = repository_fixture.with_controller_remote(remote_stub) async with fixture as env: app = env.app diff --git a/tests/ocrdmonitor/server/test_settings.py b/tests/ocrdmonitor/server/test_settings.py index fe31d7b..8bb6496 100644 --- a/tests/ocrdmonitor/server/test_settings.py +++ b/tests/ocrdmonitor/server/test_settings.py @@ -5,7 +5,6 @@ from ocrdmonitor.server.settings import ( OcrdBrowserSettings, - OcrdControllerSettings, OcrdLogViewSettings, OcrdManagerSettings, Settings, @@ -18,12 +17,6 @@ workspace_dir=Path("path/to/workdir"), port_range=(9000, 9100), ), - ocrd_controller=OcrdControllerSettings( - host="controller.ocrdhost.com", - user="controller_user", - port=22, - keyfile=Path(".ssh/id_rsa"), - ), ocrd_logview=OcrdLogViewSettings( port=22, ), @@ -43,7 +36,6 @@ def to_dict(setting_name: str, settings: dict[str, Any]) -> dict[str, str]: return dict( MONITOR_DB_CONNECTION_STRING=EXPECTED.monitor_db_connection_string, **to_dict("BROWSER", EXPECTED.ocrd_browser.model_dump()), - **to_dict("CONTROLLER", EXPECTED.ocrd_controller.model_dump()), **to_dict("LOGVIEW", EXPECTED.ocrd_logview.model_dump()), **to_dict("MANAGER", EXPECTED.ocrd_manager.model_dump()), ) diff --git a/tests/ocrdmonitor/test_processstatus.py b/tests/ocrdmonitor/test_processstatus.py deleted file mode 100644 index 3347959..0000000 --- a/tests/ocrdmonitor/test_processstatus.py +++ /dev/null @@ -1,43 +0,0 @@ -import datetime - -import pytest -from ocrdmonitor.processstatus import ProcessState, ProcessStatus - -PS_OUTPUT = """ - 1 Ss 0.0 3872 01:12:46 - 20 R+ 49.7 1556 02:33:02 -""" - -INVALID_GROUP_OUTPUT = ( - "error: list of session leaders OR effective group names must follow -g" -) -INVALID_FORMAT_OUTPUT = "error: unknown user-defined format specifier" -FAILING_OUTPUTS = ["", INVALID_GROUP_OUTPUT, INVALID_FORMAT_OUTPUT] - - -def test__parsing_psoutput__returns_list_of_process_status() -> None: - actual = ProcessStatus.from_shell_output(PS_OUTPUT) - - assert actual == [ - ProcessStatus( - pid=1, - state=ProcessState.SLEEPING, - percent_cpu=0.0, - memory=3872, - cpu_time=datetime.timedelta(hours=1, minutes=12, seconds=46), - ), - ProcessStatus( - pid=20, - state=ProcessState.RUNNING, - percent_cpu=49.7, - memory=1556, - cpu_time=datetime.timedelta(hours=2, minutes=33, seconds=2), - ), - ] - - -@pytest.mark.parametrize("output", FAILING_OUTPUTS) -def test__parsing_psoutput_with_error__returns_empty_list(output: str) -> None: - actual = ProcessStatus.from_shell_output(output) - - assert actual == [] diff --git a/tests/ocrdmonitor/test_sshremote.py b/tests/ocrdmonitor/test_sshremote.py deleted file mode 100644 index 92a69c2..0000000 --- a/tests/ocrdmonitor/test_sshremote.py +++ /dev/null @@ -1,46 +0,0 @@ -from pathlib import Path -import pytest -from typing import Any, Awaitable, Callable, TypeVar - -from testcontainers.general import DockerContainer - -from ocrdmonitor.processstatus import ProcessState -from ocrdmonitor.sshremote import SSHRemote -from tests import markers -from tests.ocrdmonitor.sshcontainer import ( - get_process_group_from_container, - SSHConfig, - KEYDIR, -) - -T = TypeVar("T") - - -@pytest.mark.asyncio -@pytest.mark.integration -@markers.skip_if_no_docker -async def test_ps_over_ssh__returns_list_of_process_status( - openssh_server: DockerContainer, -) -> None: - process_group = get_process_group_from_container(openssh_server) - sut = SSHRemote( - config=SSHConfig( - host="localhost", - port=2222, - user="testcontainer", - keyfile=Path(KEYDIR) / "id.rsa", - ), - ) - - actual = await run_until_truthy(sut.process_status, process_group) - - first_process = actual[0] - assert first_process.pid == process_group - assert first_process.state == ProcessState.SLEEPING - - -async def run_until_truthy(fn: Callable[..., Awaitable[T]], *args: Any) -> T: - while not (result := await fn(*args)): - continue - - return result From a593a9cb941ac3af2051ba7b8996e4abc305d20c Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 09:55:10 +0100 Subject: [PATCH 2/7] fix 94c61e6 --- ocrdmonitor/environment.py | 3 +- ocrdmonitor/server/jobs.py | 1 - .../server/fixtures/environment.py | 9 ----- tests/ocrdmonitor/server/test_job_endpoint.py | 35 ++----------------- 4 files changed, 3 insertions(+), 45 deletions(-) diff --git a/ocrdmonitor/environment.py b/ocrdmonitor/environment.py index 8bbd5fb..88b9ac0 100644 --- a/ocrdmonitor/environment.py +++ b/ocrdmonitor/environment.py @@ -9,9 +9,8 @@ SubProcessOcrdBrowserFactory, ) from ocrdmonitor import database -from ocrdmonitor.protocols import RemoteServer, Repositories +from ocrdmonitor.protocols import Repositories from ocrdmonitor.server.settings import Settings -from ocrdmonitor.sshremote import SSHRemote BrowserType = Type[SubProcessOcrdBrowser] | Type[DockerOcrdBrowser] CreatingFactories: dict[str, Callable[[set[int]], OcrdBrowserFactory]] = { diff --git a/ocrdmonitor/server/jobs.py b/ocrdmonitor/server/jobs.py index 1eb5458..cf34dc7 100644 --- a/ocrdmonitor/server/jobs.py +++ b/ocrdmonitor/server/jobs.py @@ -8,7 +8,6 @@ from fastapi.responses import JSONResponse from fastapi.templating import Jinja2Templates -from ocrdmonitor.processstatus import ProcessStatus from ocrdmonitor.protocols import Environment, OcrdJob, Repositories import httpx diff --git a/tests/ocrdmonitor/server/fixtures/environment.py b/tests/ocrdmonitor/server/fixtures/environment.py index a9074b7..d50460e 100644 --- a/tests/ocrdmonitor/server/fixtures/environment.py +++ b/tests/ocrdmonitor/server/fixtures/environment.py @@ -12,11 +12,9 @@ from fastapi.testclient import TestClient from ocrdbrowser import OcrdBrowserFactory -from ocrdmonitor.processstatus import ProcessStatus from ocrdmonitor.protocols import ( BrowserProcessRepository, BrowserRestoringFactory, - RemoteServer, Repositories, ) from ocrdmonitor.server.app import create_app @@ -34,13 +32,6 @@ ) -class RemoteDummy: - async def read_file(self, path: str) -> str: - return "" - - async def process_status(self, process_group: int) -> list[ProcessStatus]: - return [] - @dataclass class DevEnvironment: diff --git a/tests/ocrdmonitor/server/test_job_endpoint.py b/tests/ocrdmonitor/server/test_job_endpoint.py index 0307b5c..93fd444 100644 --- a/tests/ocrdmonitor/server/test_job_endpoint.py +++ b/tests/ocrdmonitor/server/test_job_endpoint.py @@ -10,7 +10,6 @@ import pytest from pytest_httpx import HTTPXMock -from ocrdmonitor.processstatus import ProcessState, ProcessStatus from ocrdmonitor.protocols import OcrdJob from tests.ocrdmonitor.server import scraping from tests.ocrdmonitor.server.fixtures.environment import Fixture @@ -58,8 +57,6 @@ async def test__given_a_running_ocrd_job__the_job_endpoint_lists_it_with_resourc repository_fixture: Fixture, ) -> None: pid = 1234 - expected_status = make_status(pid) - remote_stub = RemoteServerStub(expected_status) async with fixture as env: app = env.app @@ -69,7 +66,7 @@ async def test__given_a_running_ocrd_job__the_job_endpoint_lists_it_with_resourc response = app.get("/jobs/") assert response.is_success - assert_lists_running_job(job, expected_status, response) + assert_lists_running_job(job, response) @pytest.mark.asyncio @@ -108,29 +105,6 @@ def non_mocked_hosts() -> list[str]: return ["testserver"] -def make_status(pid: int) -> ProcessStatus: - expected_status = ProcessStatus( - pid=pid, - state=ProcessState.RUNNING, - percent_cpu=0.25, - memory=1024, - cpu_time=timedelta(seconds=10, minutes=5, hours=1), - ) - - return expected_status - - -class RemoteServerStub: - def __init__(self, expected_status: ProcessStatus) -> None: - self.expected_status = expected_status - - async def read_file(self, path: str) -> str: - return str(self.expected_status.pid) - - async def process_status(self, process_group: int) -> list[ProcessStatus]: - return [self.expected_status] - - def running_ocrd_job(pid: int) -> OcrdJob: running_job = replace(job_template(), pid=pid) return running_job @@ -154,7 +128,6 @@ def assert_lists_completed_job( def assert_lists_running_job( running_job: OcrdJob, - process_status: ProcessStatus, response: Response, ) -> None: texts = collect_texts_from_job_table(response.content, "running-jobs") @@ -164,11 +137,7 @@ def assert_lists_running_job( str(running_job.task_id), str(running_job.process_id), running_job.workflow_file.name, - str(process_status.pid), - str(process_status.state), - str(process_status.percent_cpu), - str(process_status.memory), - str(process_status.cpu_time), + str(running_job.pid), ] From be2a4fbd453517563ab4387e7cbb59187045789d Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 21:39:27 +0100 Subject: [PATCH 3/7] fix job tests in 94c61e6 --- tests/ocrdmonitor/server/test_job_endpoint.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/ocrdmonitor/server/test_job_endpoint.py b/tests/ocrdmonitor/server/test_job_endpoint.py index 93fd444..1d43797 100644 --- a/tests/ocrdmonitor/server/test_job_endpoint.py +++ b/tests/ocrdmonitor/server/test_job_endpoint.py @@ -43,13 +43,13 @@ async def test__given_a_completed_ocrd_job__the_job_endpoint_lists_it_in_a_table result_text: str, ) -> None: async with repository_fixture as env: - completed_job = replace(job_template(), return_code=return_code) - await env._repositories.ocrd_jobs.insert(completed_job) + job = completed_ocrd_job(return_code) + await env._repositories.ocrd_jobs.insert(job) response = env.app.get("/jobs/") assert response.is_success - assert_lists_completed_job(completed_job, result_text, response) + assert_lists_completed_job(job, result_text, response) @pytest.mark.asyncio @@ -58,12 +58,11 @@ async def test__given_a_running_ocrd_job__the_job_endpoint_lists_it_with_resourc ) -> None: pid = 1234 - async with fixture as env: - app = env.app + async with repository_fixture as env: job = running_ocrd_job(pid) await env._repositories.ocrd_jobs.insert(job) - response = app.get("/jobs/") + response = env.app.get("/jobs/") assert response.is_success assert_lists_running_job(job, response) @@ -109,6 +108,9 @@ def running_ocrd_job(pid: int) -> OcrdJob: running_job = replace(job_template(), pid=pid) return running_job +def completed_ocrd_job(return_code: int) -> OcrdJob: + completed_job = replace(job_template(), return_code=return_code) + return completed_job def assert_lists_completed_job( completed_job: OcrdJob, result_text: str, response: Response From 963529a90a764ca5553ab5513c81a3291a543e6a Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 21:47:18 +0100 Subject: [PATCH 4/7] remove type:ignore hints (causes mypy errors) --- ocrdmonitor/database/_browserprocessrepository.py | 2 +- ocrdmonitor/database/_initdb.py | 8 ++++---- ocrdmonitor/database/_ocrdjobrepository.py | 2 +- ocrdmonitor/server/settings.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ocrdmonitor/database/_browserprocessrepository.py b/ocrdmonitor/database/_browserprocessrepository.py index f3e066d..b051940 100644 --- a/ocrdmonitor/database/_browserprocessrepository.py +++ b/ocrdmonitor/database/_browserprocessrepository.py @@ -29,7 +29,7 @@ def __init__(self, restoring_factory: BrowserRestoringFactory) -> None: self._restoring_factory = restoring_factory async def insert(self, browser: OcrdBrowser) -> None: - await BrowserProcess( # type: ignore + await BrowserProcess( address=browser.address(), owner=browser.owner(), process_id=browser.process_id(), diff --git a/ocrdmonitor/database/_initdb.py b/ocrdmonitor/database/_initdb.py index 0a44bcf..1763d81 100644 --- a/ocrdmonitor/database/_initdb.py +++ b/ocrdmonitor/database/_initdb.py @@ -39,11 +39,11 @@ async def init(connection_str: str, force_initialize: bool = False) -> None: __initialized = True connection_str = rebuild_connection_string(connection_str) - client: AsyncIOMotorClient = AsyncIOMotorClient(connection_str) # type: ignore - client.get_io_loop = asyncio.get_event_loop # type: ignore + client: AsyncIOMotorClient = AsyncIOMotorClient(connection_str) + client.get_io_loop = asyncio.get_event_loop await init_beanie( - database=client.ocrd, # type: ignore - document_models=[BrowserProcess, MongoOcrdJob], # type: ignore + database=client.ocrd, + document_models=[BrowserProcess, MongoOcrdJob], ) return init diff --git a/ocrdmonitor/database/_ocrdjobrepository.py b/ocrdmonitor/database/_ocrdjobrepository.py index 2e66a09..ed86e15 100644 --- a/ocrdmonitor/database/_ocrdjobrepository.py +++ b/ocrdmonitor/database/_ocrdjobrepository.py @@ -35,7 +35,7 @@ class Settings: class MongoJobRepository: async def insert(self, job: OcrdJob) -> None: - await MongoOcrdJob(**asdict(job)).insert() # type: ignore + await MongoOcrdJob(**asdict(job)).insert() async def find_all(self) -> list[OcrdJob]: return [OcrdJob(**j.dict(exclude={"id"})) for j in await MongoOcrdJob.find_all().to_list()] diff --git a/ocrdmonitor/server/settings.py b/ocrdmonitor/server/settings.py index 6fbec0c..4d2997d 100644 --- a/ocrdmonitor/server/settings.py +++ b/ocrdmonitor/server/settings.py @@ -63,7 +63,7 @@ def validator(cls, value: str | tuple[int, int]) -> tuple[int, int]: if not int_pair or len(int_pair) != 2: raise ValueError("Port range must have exactly two values") - return int_pair # type: ignore + return int_pair class Settings(BaseSettings): From 421277bf4a8435e58f59ada0c9705bdaa8a77916 Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 21:56:27 +0100 Subject: [PATCH 5/7] motor.motor_asyncio.AsyncIOMotorClient has no type hinting support --- ocrdmonitor/database/_initdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrdmonitor/database/_initdb.py b/ocrdmonitor/database/_initdb.py index 1763d81..1637651 100644 --- a/ocrdmonitor/database/_initdb.py +++ b/ocrdmonitor/database/_initdb.py @@ -39,7 +39,7 @@ async def init(connection_str: str, force_initialize: bool = False) -> None: __initialized = True connection_str = rebuild_connection_string(connection_str) - client: AsyncIOMotorClient = AsyncIOMotorClient(connection_str) + client = AsyncIOMotorClient(connection_str) client.get_io_loop = asyncio.get_event_loop await init_beanie( database=client.ocrd, From 6aa918bf1157b6e4a820b08ec4343c8d4f3ea231 Mon Sep 17 00:00:00 2001 From: ocrd Date: Wed, 6 Mar 2024 22:06:24 +0100 Subject: [PATCH 6/7] try to make motor.motor_asyncio.AsyncIOMotorClient work with strict mypy --- ocrdmonitor/database/_initdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrdmonitor/database/_initdb.py b/ocrdmonitor/database/_initdb.py index 1637651..922c30e 100644 --- a/ocrdmonitor/database/_initdb.py +++ b/ocrdmonitor/database/_initdb.py @@ -39,8 +39,8 @@ async def init(connection_str: str, force_initialize: bool = False) -> None: __initialized = True connection_str = rebuild_connection_string(connection_str) - client = AsyncIOMotorClient(connection_str) - client.get_io_loop = asyncio.get_event_loop + client = AsyncIOMotorClient(connection_str) # type: ignore[var-annotated] + client.get_io_loop = asyncio.get_event_loop # type: ignore[method-assign] await init_beanie( database=client.ocrd, document_models=[BrowserProcess, MongoOcrdJob], From 690fe6467241db72b694b71e467cb8e7bbc9d5fe Mon Sep 17 00:00:00 2001 From: ocrd Date: Fri, 8 Mar 2024 01:41:16 +0100 Subject: [PATCH 7/7] workspace prefix must be /data instead of /data/ocr-d --- init.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/init.sh b/init.sh index 93179de..ab68f6b 100755 --- a/init.sh +++ b/init.sh @@ -2,7 +2,15 @@ export MONITOR_DB_CONNECTION_STRING=$MONITOR_DB_CONNECTION export OCRD_BROWSER__MODE=native -export OCRD_BROWSER__WORKSPACE_DIR=/data/ocr-d +# all OCR-D workspaces on the Manager are under /data/ocr-d +# but since the Manager resolves everything under /data +# it tracks the workspace directory relative to that in the database +# (e.g. ocr-d/testdata-production) +# so if we write /data/ocr-d, we could list workspaces fine, +# but our workspace URLs from the job database would be wrong +# (resolving as /data/ocr-d/ocr-d/...) +# so better just use /data as well here: +export OCRD_BROWSER__WORKSPACE_DIR=/data export OCRD_BROWSER__PORT_RANGE="[9000,9100]" export OCRD_LOGVIEW__PORT=$MONITOR_PORT_LOG export OCRD_MANAGER__URL=$MANAGER_URL