Skip to content

Commit

Permalink
Merge pull request #43 from bertsky/remove-controller-ssh-jobstatus
Browse files Browse the repository at this point in the history
remove SSH to Controller for job status
  • Loading branch information
bertsky authored Mar 8, 2024
2 parents 9eb7b1a + 690fe64 commit f2f0975
Show file tree
Hide file tree
Showing 22 changed files with 36 additions and 432 deletions.
6 changes: 1 addition & 5 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
CONTROLLER_HOST=ocrd-controller
CONTROLLER_PORT_SSH=22

MANAGER_DATA=~/.ssh/id_rsa
MANAGER_KEY=~/
MANAGER_DATA=~/
MANAGER_HOST=ocrd-manager
MANAGER_PORT_WEB=4004

Expand Down
14 changes: 0 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,30 @@ Variables:
currently: "$(TAGNAME)"
- MONITOR_PORT_WEB TCP port for the (host-side) web server
currently: $(MONITOR_PORT_WEB)
- MANAGER_KEY SSH key file to mount (for the Controller client)
currently: "$(MANAGER_KEY)"
- MANAGER_DATA host directory to mount into `/data` (shared with Manager)
currently: "$(MANAGER_DATA)"
- MANAGER_WORKFLOWS host directory to mount into `/workflows` (shared with Manager)
currently: "$(MANAGER_WORKFLOWS)"
- NETWORK Docker network to use (manage via "docker network")
currently: $(NETWORK)
- CONTROLLER_HOST network address for the Controller client
(must be reachable from the container network)
currently: $(CONTROLLER_HOST)
- CONTROLLER_PORT_SSH network port for the Controller client
(must be reachable from the container network)
currently: $(CONTROLLER_PORT_SSH)
EOF
endef
export HELP
help: ; @eval "$$HELP"

MANAGER_KEY ?= $(firstword $(filter-out %.pub,$(wildcard $(HOME)/.ssh/id_*)))
MANAGER_DATA ?= $(CURDIR)
MANAGER_WORKFLOWS ?= $(CURDIR)
MONITOR_PORT_WEB ?= 5000
NETWORK ?= bridge
CONTROLLER_HOST ?= $(shell dig +short $$HOSTNAME)
CONTROLLER_PORT_SSH ?= 8022
run: $(DATA)
docker run -d --rm \
-h ocrd_monitor \
--name ocrd_monitor \
--network=$(NETWORK) \
-p $(MONITOR_PORT_WEB):5000 \
-v ${MANAGER_KEY}:/id_rsa \
--mount type=bind,source=$(MANAGER_KEY),target=/id_rsa \
-v $(MANAGER_DATA):/data \
-v $(MANAGER_WORKFLOWS):/workflows \
-v shared:/run/lock/ocrd.jobs \
-e CONTROLLER=$(CONTROLLER_HOST):$(CONTROLLER_PORT_SSH) \
-e MONITOR_PORT_LOG=${MONITOR_PORT_LOG} \
$(TAGNAME)

Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ In order to work properly, the following **environment variables** must be set:

| Variable | Description |
| ------------------- | -------------------------------------------------------------------------------- |
| CONTROLLER_HOST | Hostname of the OCR-D Controller |
| CONTROLLER_PORT_SSH | Port on the OCR-D Controller host that allows a SSH connection |
| MANAGER_DATA | Path to the OCR-D workspaces on the host |
| MANAGER_WORKFLOWS | Path to the OCR-D workflows on the host |
| MANAGER_KEY | Path to a private key that can be used to authenticate with the OCR-D Controller |
| MONITOR_PORT_WEB | The port at which the OCR-D Monitor will be available on the host |
| MONITOR_PORT_LOG | The port at which the Dozzle logs will be available on the host |

Expand Down
2 changes: 0 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ services:
hostname: ${MONITOR_HOST}

environment:
CONTROLLER: "${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}"
MANAGER_URL: "http://${MANAGER_HOST}:${MANAGER_PORT_WEB}"
MONITOR_PORT_LOG: ${MONITOR_PORT_LOG}
MONITOR_DB_CONNECTION: "mongodb://${MONITOR_DB_ROOT_USER:-root}:${MONITOR_DB_ROOT_PASSWORD:-root_password}@ocrd-database:27017"
Expand All @@ -28,7 +27,6 @@ services:
volumes:
- ${MANAGER_DATA}:/data
- ${MANAGER_WORKFLOWS}:/workflows
- ${MANAGER_KEY}:/id_rsa
- shared:/run/lock/ocrd.jobs

ocrd-logview:
Expand Down
31 changes: 9 additions & 22 deletions init.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
#!/usr/bin/env bash

mkdir -p ~/.ssh
cat /id_rsa >> ~/.ssh/id_rsa
chmod go-rw ~/.ssh/id_rsa

# Add ocrd controller as global and known_hosts if env exist
if [ -n "$CONTROLLER" ]; then
CONTROLLER_HOST=${CONTROLLER%:*}
CONTROLLER_PORT=${CONTROLLER#*:}
CONTROLLER_IP=$(nslookup $CONTROLLER_HOST | grep 'Address\:' | awk 'NR==2 {print $2}')

if test -e /etc/ssh/ssh_known_hosts; then
ssh-keygen -R $CONTROLLER_HOST -f /etc/ssh/ssh_known_hosts
ssh-keygen -R $CONTROLLER_IP -f /etc/ssh/ssh_known_hosts
fi
ssh-keyscan -H -p ${CONTROLLER_PORT:-22} $CONTROLLER_HOST,$CONTROLLER_IP >>/etc/ssh/ssh_known_hosts
fi

export MONITOR_DB_CONNECTION_STRING=$MONITOR_DB_CONNECTION
export OCRD_BROWSER__MODE=native
export OCRD_BROWSER__WORKSPACE_DIR=/data/ocr-d
# all OCR-D workspaces on the Manager are under /data/ocr-d
# but since the Manager resolves everything under /data
# it tracks the workspace directory relative to that in the database
# (e.g. ocr-d/testdata-production)
# so if we write /data/ocr-d, we could list workspaces fine,
# but our workspace URLs from the job database would be wrong
# (resolving as /data/ocr-d/ocr-d/...)
# so better just use /data as well here:
export OCRD_BROWSER__WORKSPACE_DIR=/data
export OCRD_BROWSER__PORT_RANGE="[9000,9100]"
export OCRD_LOGVIEW__PORT=$MONITOR_PORT_LOG
export OCRD_CONTROLLER__HOST=$CONTROLLER_HOST
export OCRD_CONTROLLER__PORT=$CONTROLLER_PORT
export OCRD_CONTROLLER__USER=admin
export OCRD_CONTROLLER__KEYFILE=~/.ssh/id_rsa
export OCRD_MANAGER__URL=$MANAGER_URL

cd /usr/local/ocrd-monitor
Expand Down
2 changes: 1 addition & 1 deletion ocrdmonitor/database/_browserprocessrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, restoring_factory: BrowserRestoringFactory) -> None:
self._restoring_factory = restoring_factory

async def insert(self, browser: OcrdBrowser) -> None:
await BrowserProcess( # type: ignore
await BrowserProcess(
address=browser.address(),
owner=browser.owner(),
process_id=browser.process_id(),
Expand Down
8 changes: 4 additions & 4 deletions ocrdmonitor/database/_initdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ async def init(connection_str: str, force_initialize: bool = False) -> None:

__initialized = True
connection_str = rebuild_connection_string(connection_str)
client: AsyncIOMotorClient = AsyncIOMotorClient(connection_str) # type: ignore
client.get_io_loop = asyncio.get_event_loop # type: ignore
client = AsyncIOMotorClient(connection_str) # type: ignore[var-annotated]
client.get_io_loop = asyncio.get_event_loop # type: ignore[method-assign]
await init_beanie(
database=client.ocrd, # type: ignore
document_models=[BrowserProcess, MongoOcrdJob], # type: ignore
database=client.ocrd,
document_models=[BrowserProcess, MongoOcrdJob],
)

return init
Expand Down
3 changes: 1 addition & 2 deletions ocrdmonitor/database/_ocrdjobrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class MongoOcrdJob(Document):
workdir: Path
remotedir: str
workflow_file: Path
controller_address: str

class Settings:
name = "OcrdJob"
Expand All @@ -36,7 +35,7 @@ class Settings:

class MongoJobRepository:
async def insert(self, job: OcrdJob) -> None:
await MongoOcrdJob(**asdict(job)).insert() # type: ignore
await MongoOcrdJob(**asdict(job)).insert()

async def find_all(self) -> list[OcrdJob]:
return [OcrdJob(**j.dict(exclude={"id"})) for j in await MongoOcrdJob.find_all().to_list()]
6 changes: 1 addition & 5 deletions ocrdmonitor/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
SubProcessOcrdBrowserFactory,
)
from ocrdmonitor import database
from ocrdmonitor.protocols import RemoteServer, Repositories
from ocrdmonitor.protocols import Repositories
from ocrdmonitor.server.settings import Settings
from ocrdmonitor.sshremote import SSHRemote

BrowserType = Type[SubProcessOcrdBrowser] | Type[DockerOcrdBrowser]
CreatingFactories: dict[str, Callable[[set[int]], OcrdBrowserFactory]] = {
Expand Down Expand Up @@ -40,6 +39,3 @@ async def repositories(self) -> Repositories:
def browser_factory(self) -> OcrdBrowserFactory:
port_range_set = set(range(*self.settings.ocrd_browser.port_range))
return CreatingFactories[self.settings.ocrd_browser.mode](port_range_set)

def controller_server(self) -> RemoteServer:
return SSHRemote(self.settings.ocrd_controller)
25 changes: 0 additions & 25 deletions ocrdmonitor/ocrdcontroller.py

This file was deleted.

58 changes: 0 additions & 58 deletions ocrdmonitor/processstatus.py

This file was deleted.

12 changes: 0 additions & 12 deletions ocrdmonitor/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Collection, NamedTuple, Protocol

from ocrdbrowser import OcrdBrowser, OcrdBrowserFactory
from ocrdmonitor.processstatus import ProcessStatus
from ocrdmonitor.server.settings import Settings


Expand Down Expand Up @@ -49,7 +48,6 @@ class OcrdJob:
workdir: Path
remotedir: str
workflow_file: Path
controller_address: str

@property
def is_running(self) -> bool:
Expand All @@ -72,13 +70,6 @@ async def find_all(self) -> list[OcrdJob]:
...


class RemoteServer(Protocol):
async def read_file(self, path: str) -> str:
...

async def process_status(self, process_group: int) -> list[ProcessStatus]:
...

class Repositories(NamedTuple):
browser_processes: BrowserProcessRepository
ocrd_jobs: JobRepository
Expand All @@ -92,6 +83,3 @@ async def repositories(self) -> Repositories:

def browser_factory(self) -> OcrdBrowserFactory:
...

def controller_server(self) -> RemoteServer:
...
29 changes: 2 additions & 27 deletions ocrdmonitor/server/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,12 @@
from fastapi.responses import JSONResponse
from fastapi.templating import Jinja2Templates

from ocrdmonitor.ocrdcontroller import OcrdController
from ocrdmonitor.processstatus import ProcessStatus
from ocrdmonitor.protocols import Environment, OcrdJob, Repositories

import httpx
import logging


@dataclass
class RunningJob:
ocrd_job: OcrdJob
process_status: ProcessStatus


def split_into_running_and_completed(
jobs: Iterable[OcrdJob],
) -> tuple[list[OcrdJob], list[OcrdJob]]:
Expand All @@ -30,25 +22,11 @@ def split_into_running_and_completed(
return running_ocrd_jobs, completed_ocrd_jobs


def wrap_in_running_job_type(
running_ocrd_jobs: Iterable[OcrdJob],
job_status: Iterable[ProcessStatus | None],
) -> Iterable[RunningJob]:
running_jobs = [
RunningJob(job, process_status)
for job, process_status in zip(running_ocrd_jobs, job_status)
if process_status is not None
]

return running_jobs


def create_jobs(
templates: Jinja2Templates,
environment: Environment,
) -> APIRouter:
router = APIRouter(prefix="/jobs")
controller = OcrdController(environment.controller_server())

@router.get("/", name="jobs")
async def jobs(
Expand All @@ -58,17 +36,14 @@ async def jobs(
jobs = await job_repository.find_all()
running, completed = split_into_running_and_completed(jobs)

job_status = [await controller.status_for(job) for job in running]
running_jobs = wrap_in_running_job_type(running, job_status)

now = datetime.now(timezone.utc)
return templates.TemplateResponse(
"jobs.html.j2",
{
"request": request,
"running_jobs": sorted(
running_jobs,
key=lambda x: x.ocrd_job.time_created or now,
running,
key=lambda x: x.time_created or now,
),
"completed_jobs": sorted(
completed,
Expand Down
Loading

0 comments on commit f2f0975

Please sign in to comment.