From 80c0427901e1a52bd03bf4dfe4ac42c69e9e239f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 3 Oct 2024 13:40:54 +0200 Subject: [PATCH 001/144] add new helper method to implement workunit definition reuse --- .../experimental/app_interface/workunit/definition.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/bfabric/experimental/app_interface/workunit/definition.py b/src/bfabric/experimental/app_interface/workunit/definition.py index 440385e4..6eb9baec 100644 --- a/src/bfabric/experimental/app_interface/workunit/definition.py +++ b/src/bfabric/experimental/app_interface/workunit/definition.py @@ -81,6 +81,17 @@ def from_ref(cls, workunit: Path | int, client: Bfabric) -> WorkunitDefinition: workunit = Workunit.find(id=workunit, client=client) return cls.from_workunit(workunit) + @classmethod + def from_ref_cached(cls, workunit: Path | int, file: Path, client: Bfabric) -> WorkunitDefinition: + """Loads the workunit definition from the provided reference, caching the result to the provided file. + If the cache file exists, it will be loaded directly instead of resolving the reference. + """ + if file.exists(): + return cls.from_yaml(file) + result = cls.from_ref(workunit=workunit, client=client) + result.to_yaml(file) + return result + @classmethod def from_workunit(cls, workunit: Workunit) -> WorkunitDefinition: """Loads the workunit definition from the provided B-Fabric workunit.""" From 3624533fafc12b5b6dc8fcafd0fbdae71de43d47 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 3 Oct 2024 13:51:47 +0200 Subject: [PATCH 002/144] reduce the amount of times the workunit definition is rebuilt --- .../app_interface/app_runner/runner.py | 18 +++++++---- .../app_interface/workunit/definition.py | 30 +++++++++---------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/bfabric/experimental/app_interface/app_runner/runner.py b/src/bfabric/experimental/app_interface/app_runner/runner.py index 03f9514a..e375f056 100644 --- a/src/bfabric/experimental/app_interface/app_runner/runner.py +++ b/src/bfabric/experimental/app_interface/app_runner/runner.py @@ -3,9 +3,9 @@ import shlex import subprocess from pathlib import Path -from loguru import logger import yaml +from loguru import logger from pydantic import BaseModel from bfabric import Bfabric @@ -66,24 +66,30 @@ def run_app( read_only: bool = False, dispatch_active: bool = True, ) -> None: - # TODO future: the workunit definition must be loaded from bfabric exactly once! this is quite inefficient right now - workunit_definition = WorkunitDefinition.from_ref(workunit_ref, client=client) + workunit_definition_file = work_dir / "workunit_definition.yml" + workunit_definition = WorkunitDefinition.from_ref( + workunit=workunit_ref, client=client, cache_file=workunit_definition_file + ) if not read_only: + # Set the workunit status to processing client.save("workunit", {"id": workunit_definition.registration.workunit_id, "status": "processing"}) runner = Runner(spec=app_spec, client=client, ssh_user=ssh_user) if dispatch_active: - runner.run_dispatch(workunit_ref=workunit_ref, work_dir=work_dir) + runner.run_dispatch(workunit_ref=workunit_definition_file, work_dir=work_dir) chunks_file = ChunksFile.model_validate(yaml.safe_load((work_dir / "chunks.yml").read_text())) for chunk in chunks_file.chunks: logger.info(f"Processing chunk {chunk}") runner.run_prepare_input(chunk_dir=chunk) runner.run_process(chunk_dir=chunk) - runner.run_collect(workunit_ref=workunit_ref, chunk_dir=chunk) + runner.run_collect(workunit_ref=workunit_definition_file, chunk_dir=chunk) if not read_only: runner.run_register_outputs( - chunk_dir=chunk, workunit_ref=workunit_ref, reuse_default_resource=app_spec.reuse_default_resource + chunk_dir=chunk, + workunit_ref=workunit_definition_file, + reuse_default_resource=app_spec.reuse_default_resource, ) if not read_only: + # Set the workunit status to available client.save("workunit", {"id": workunit_definition.registration.workunit_id, "status": "available"}) diff --git a/src/bfabric/experimental/app_interface/workunit/definition.py b/src/bfabric/experimental/app_interface/workunit/definition.py index 6eb9baec..873a13ff 100644 --- a/src/bfabric/experimental/app_interface/workunit/definition.py +++ b/src/bfabric/experimental/app_interface/workunit/definition.py @@ -71,25 +71,25 @@ class WorkunitDefinition(BaseModel): registration: WorkunitRegistrationDefinition | None @classmethod - def from_ref(cls, workunit: Path | int, client: Bfabric) -> WorkunitDefinition: - """Loads the workunit definition from the provided reference, - which can be a path to a YAML file, or a workunit ID. + def from_ref(cls, workunit: Path | int, client: Bfabric, cache_file: Path | None = None) -> WorkunitDefinition: + """Loads the workunit definition from the provided reference, which can be a path to a YAML file, + or a workunit ID. + + If the cache file is provided and exists, it will be loaded directly instead of resolving the reference. + Otherwise, the result will be cached to the provided file. + :param workunit: The workunit reference, which can be a path to a YAML file, or a workunit ID. + :param client: The B-Fabric client to use for resolving the workunit. + :param cache_file: The path to the cache file, if any. """ + if cache_file is not None and cache_file.exists(): + return cls.from_yaml(cache_file) if isinstance(workunit, Path): - return cls.from_yaml(workunit) + result = cls.from_yaml(workunit) else: workunit = Workunit.find(id=workunit, client=client) - return cls.from_workunit(workunit) - - @classmethod - def from_ref_cached(cls, workunit: Path | int, file: Path, client: Bfabric) -> WorkunitDefinition: - """Loads the workunit definition from the provided reference, caching the result to the provided file. - If the cache file exists, it will be loaded directly instead of resolving the reference. - """ - if file.exists(): - return cls.from_yaml(file) - result = cls.from_ref(workunit=workunit, client=client) - result.to_yaml(file) + result = cls.from_workunit(workunit) + if cache_file is not None: + result.to_yaml(cache_file) return result @classmethod From 2a06ecb21e90b982512bba9a5c8dd3664ad4ecef Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 3 Oct 2024 14:00:14 +0200 Subject: [PATCH 003/144] remove the old release script --- docs/contribute.md | 7 ++-- release.py | 82 ---------------------------------------------- 2 files changed, 3 insertions(+), 86 deletions(-) delete mode 100644 release.py diff --git a/docs/contribute.md b/docs/contribute.md index 5cdce50a..5ebf9045 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -33,11 +33,10 @@ mkdocs gh-deploy ## Release To create a release, increase the version in `pyproject.toml`, prepare `changelog.md`, -commit everything and run `release.py`. This script will +commit everything and create a PR to the `stable` branch. -- Create a tag and push it to GitHub -- Merge the changes to the `stable` branch and push it to GitHub -- Build the documentation and push it to GitHub Pages +Once this is merged a Github Action will create a tag (if the tag already exists, it will fail!) and the documentation +will be rebuilt and published to GitHub Pages. The only manual step that remains is creating a release on GitHub. To do so, you can paste the changelog section of the release and create a new release on GitHub using the tag that was created. diff --git a/release.py b/release.py deleted file mode 100644 index 28e666fb..00000000 --- a/release.py +++ /dev/null @@ -1,82 +0,0 @@ -import subprocess -import re -import tomllib -import sys -from pathlib import Path - - -def get_remote_tags() -> list[str]: - # executes: git ls-remote --tags origin - out = subprocess.run(["git", "ls-remote", "--tags", "origin"], check=True, capture_output=True) - return [line.split("\t")[-1].split("/")[-1] for line in out.stdout.decode().split("\n") if line] - - -def get_local_tags() -> list[str]: - # executes: git tag - out = subprocess.run(["git", "tag"], check=True, capture_output=True) - return out.stdout.decode().split("\n") - - -def get_existing_releases(remote: bool) -> list[str]: - tags = get_remote_tags() if remote else get_local_tags() - - # e.g. 1.2.21 - pattern = re.compile(r"^\d+\.\d+\.\d+$") - return [tag for tag in tags if pattern.match(tag)] - - -def get_most_recent_release(remote: bool) -> str: - sorted_releases = sorted(get_existing_releases(remote=remote), key=lambda x: tuple(map(int, x.split(".")))) - return sorted_releases[-1] - - -def get_current_pyproject_toml_version() -> str: - pyproject_toml_path = Path("pyproject.toml") - pyproject_toml = tomllib.loads(pyproject_toml_path.read_text()) - return pyproject_toml["project"]["version"] - - -def check_version() -> str: - released_remote = get_most_recent_release(remote=True) - released_local = get_most_recent_release(remote=False) - current = get_current_pyproject_toml_version() - if released_remote == current: - print(f"Version {current} is already released remotely. Please bump the version in pyproject.toml") - sys.exit(1) - elif released_local == current: - print(f"Version {current} is already released locally. Please bump the version in pyproject.toml") - sys.exit(1) - else: - return current - - -def checkout_branch(branch: str) -> None: - subprocess.run(["git", "checkout", branch], check=True) - - -def create_and_push_tag(version: str) -> None: - subprocess.run(["git", "tag", version], check=True) - subprocess.run(["git", "push", "origin", version], check=True) - - -def merge_and_push_current_branch(branch: str) -> None: - subprocess.run(["git", "merge", branch], check=True) - subprocess.run(["git", "push", "origin"], check=True) - - -def publish_docs() -> None: - subprocess.run(["mkdocs", "gh-deploy"], check=True) - - -def main() -> None: - checkout_branch("main") - version = check_version() - create_and_push_tag(version) - checkout_branch("stable") - merge_and_push_current_branch("main") - checkout_branch("main") - publish_docs() - - -if __name__ == "__main__": - main() From b6db53c14fcebd1bed9689f3ae224e7b6ce21a39 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 3 Oct 2024 14:09:45 +0200 Subject: [PATCH 004/144] fix compat --- .../app_interface/output_registration/register.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bfabric/experimental/app_interface/output_registration/register.py b/src/bfabric/experimental/app_interface/output_registration/register.py index 8816dff2..65c0e9ae 100644 --- a/src/bfabric/experimental/app_interface/output_registration/register.py +++ b/src/bfabric/experimental/app_interface/output_registration/register.py @@ -15,7 +15,7 @@ ) from bfabric.experimental.app_interface.util.checksums import md5sum from bfabric.experimental.app_interface.util.scp import scp -from bfabric.scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset +from bfabric_scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset def _get_output_folder(spec: CopyResourceSpec, workunit: Workunit) -> Path: @@ -59,6 +59,8 @@ def copy_file_to_storage(spec: CopyResourceSpec, workunit: Workunit, storage: St def _save_dataset(spec: SaveDatasetSpec, client: Bfabric, workunit: Workunit): # TODO should not print to stdout in the future + # TODO also it should not be imported from bfabric_scripts, but rather the generic functioanlity should be available + # in the main package bfabric_save_csv2dataset( client=client, csv_file=spec.local_path, From 50a066ca413a0705fbcfce59fefa259e10c158e1 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 3 Oct 2024 14:10:53 +0200 Subject: [PATCH 005/144] ensure the directory exists --- src/bfabric/experimental/app_interface/workunit/definition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bfabric/experimental/app_interface/workunit/definition.py b/src/bfabric/experimental/app_interface/workunit/definition.py index 873a13ff..5daeceda 100644 --- a/src/bfabric/experimental/app_interface/workunit/definition.py +++ b/src/bfabric/experimental/app_interface/workunit/definition.py @@ -89,6 +89,7 @@ def from_ref(cls, workunit: Path | int, client: Bfabric, cache_file: Path | None workunit = Workunit.find(id=workunit, client=client) result = cls.from_workunit(workunit) if cache_file is not None: + cache_file.parent.mkdir(exist_ok=True, parents=True) result.to_yaml(cache_file) return result From 7eb5ff5184be020cc10e5c22ec787d5516b60274 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 11:18:26 +0200 Subject: [PATCH 006/144] correctly handle empty lists for find_all --- docs/changelog.md | 4 ++++ src/bfabric/entities/core/entity.py | 2 ++ tests/bfabric/entities/core/test_entity.py | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 580e3aad..8b5d875d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,6 +10,10 @@ Versioning currently follows `X.Y.Z` where ## \[Unreleased\] +### Fixed + +- `Entity.find_all` returns no values when an empty list is passed as an argument. + ## \[1.13.8\] - 2024-10-03 This release contains mainly internal changes and ongoing development on the experimental app interface functionality. diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index 468018be..cea55b0f 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -47,6 +47,8 @@ def find_all(cls, ids: list[int], client: Bfabric) -> dict[int, Self]: ids = [int(id) for id in ids] if len(ids) > 100: result = MultiQuery(client).read_multi(cls.ENDPOINT, {}, "id", ids) + elif not ids: + return {} else: result = client.read(cls.ENDPOINT, obj={"id": ids}) results = {x["id"]: cls(x, client=client) for x in result} diff --git a/tests/bfabric/entities/core/test_entity.py b/tests/bfabric/entities/core/test_entity.py index 706214b1..7ed028fc 100644 --- a/tests/bfabric/entities/core/test_entity.py +++ b/tests/bfabric/entities/core/test_entity.py @@ -69,6 +69,13 @@ def test_find_all_when_not_all_found(mocker, mock_client) -> None: mock_client.read.assert_called_once_with("test_endpoint", obj={"id": [1, 5]}) +def test_find_all_when_empty_list(mock_client) -> None: + entities = Entity.find_all([], mock_client) + assert entities == {} + mock_client.read.assert_not_called() + mock_client.assert_not_called() + + def test_find_by_when_found(mocker, mock_client) -> None: mock_client.read.return_value = [{"id": 1, "name": "Test Entity"}] mocker.patch.object(Entity, "ENDPOINT", new="test_endpoint") From cd4e91156d4f8b63be707b40ff3c269272bc58a2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 13:18:28 +0200 Subject: [PATCH 007/144] refactor bfabric_submitter.py --- .../wrapper_creator/bfabric_submitter.py | 193 +++++++----------- 1 file changed, 77 insertions(+), 116 deletions(-) diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py index 8417f38a..fd96c182 100644 --- a/src/bfabric/wrapper_creator/bfabric_submitter.py +++ b/src/bfabric/wrapper_creator/bfabric_submitter.py @@ -1,10 +1,14 @@ +from __future__ import annotations + import base64 +from pathlib import Path import yaml +from loguru import logger -import bfabric.wrapper_creator.gridengine as gridengine -import bfabric.wrapper_creator.slurm as slurm -from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob +from bfabric.bfabric import Bfabric +from bfabric.entities import ExternalJob, Executable +from bfabric.wrapper_creator.slurm import SLURM class BfabricSubmitter: @@ -12,12 +16,6 @@ class BfabricSubmitter: the class is used by the submitter which is executed by the bfabric system. """ - (G, B) = (None, None) - - workunitid = None - workunit = None - parameters = None - execfilelist = [] slurm_dict = { "MaxQuant_textfiles_sge": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "1G"}, "fragpipe": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "256G"}, @@ -30,91 +28,60 @@ class BfabricSubmitter: def __init__( self, - login=None, - password=None, - externaljobid=None, - user="*", - node="PRX@fgcz-r-018", - partition="prx", - nodelist="fgcz-r-028", - memory="10G", - SCHEDULEROOT="/export/bfabric/bfabric/", - scheduler="GridEngine", - ): - """ - :rtype : object - """ - self.B = BfabricExternalJob(login=login, password=password, externaljobid=externaljobid) + client: Bfabric, + externaljobid: int, + user: str = "*", + partition: str = "prx", + nodelist: str = "fgcz-r-028", + memory: str = "10G", + scheduleroot: str = "/export/bfabric/bfabric/", + scheduler: str = "GridEngine", + script_dir: Path = Path("/home/bfabric/prx"), + ) -> None: + self._client = client + self._executable_file_list = [] + self.partition = partition self.nodelist = nodelist self.memory = memory - self.SCHEDULEROOT = SCHEDULEROOT - self.user = user + self.scheduleroot = scheduleroot self.scheduler = scheduler - - print(self.B.auth.login) - print(self.B.externaljobid) - - self.workunitid = self.B.get_workunitid_of_externaljob() - - try: - self.workunit = self.B.read_object(endpoint="workunit", obj={"id": self.workunitid})[0] - except: - print("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") - raise - - try: - self.parameters = [ - self.B.read_object(endpoint="parameter", obj={"id": x._id})[0] for x in self.workunit.parameter - ] - except: - self.parameters = list() - print("Warning: could not fetch parameter.") - - partition = [x for x in self.parameters if x.key == "partition"] - nodelist = [x for x in self.parameters if x.key == "nodelist"] - memory = [x for x in self.parameters if x.key == "memory"] - application_name = self.B.get_application_name() - - if len(partition) > 0 and len(nodelist) > 0 and len(memory) > 0: - self.partition = partition[0].value - self.nodelist = nodelist[0].value - self.memory = memory[0].value - elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: - # Temporary check for old workunit previously run with SGE - self.partition = self.slurm_dict[application_name]["partition"] - self.nodelist = self.slurm_dict[application_name]["nodelist"] - self.memory = self.slurm_dict[application_name]["memory"] - else: - pass - - print(f"partition={self.partition}") - print(f"nodelist={self.nodelist}") - print(f"memory={self.memory}") - print("__init__ DONE") - - def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): - GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) - - print(script) - print(type(script)) - resQsub = GE.qsub(script=script, arguments=arguments) - - self.B.logger(f"{resQsub}") - - def submit_slurm(self, script="/tmp/runme.bash", arguments=""): - SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) - - print(script) - print(type(script)) - resSbatch = SL.sbatch(script=script, arguments=arguments) - - self.B.logger(f"{resSbatch}") - - def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): + self.user = user + self._script_dir = script_dir + + self.external_job = ExternalJob.find(id=externaljobid, client=client) + self.workunit = self.external_job.workunit + self.parameters = self.workunit.parameter_values + self.application = self.workunit.application + + default_config = self.slurm_dict.get(self.application["name"], {}) + self.partition = self.parameters.get("partition", default_config.get("partition")) + self.nodelist = self.parameters.get("nodelist", default_config.get("nodelist")) + self.memory = self.parameters.get("memory", default_config.get("memory")) + + logger.debug(f"partition={self.partition}") + logger.debug(f"nodelist={self.nodelist}") + logger.debug(f"memory={self.memory}") + logger.debug("__init__ DONE") + + # def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): + # GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.scheduleroot) + # print(script) + # print(type(script)) + # resQsub = GE.qsub(script=script, arguments=arguments) + # self.B.logger(f"{resQsub}") + + def submit_slurm(self, script: str = "/tmp/runme.bash", arguments: str = "") -> None: + slurm = SLURM(user=self.user, SLURMROOT=self.scheduleroot) + logger.debug(script) + logger.debug(type(script)) + res_slurm_batch = slurm.sbatch(script=script, arguments=arguments) + logger.debug(f"{res_slurm_batch}") + + def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)) -> str: """ composes the bash script which is executed by the submitter (sun grid engine). - as argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. + as an argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. it returns a str object containing the code. @@ -122,11 +89,7 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: """ # assert isinstance(configuration, str) - - try: - config = configuration_parser(configuration) - except: - raise ValueError("error: parsing configuration content failed.") + config = configuration_parser(configuration) _cmd_template = """#!/bin/bash # Maria d'Errico @@ -237,50 +200,48 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: config["job_configuration"]["workunit_id"], self.nodelist, self.memory, - job_notification_emails=self.B.config.job_notification_emails, + job_notification_emails=self._client.config.job_notification_emails, ) return _cmd_template - def submitter_yaml(self): + def submitter_yaml(self) -> None: """ implements the default submitter the function fetches the yaml base64 configuration file linked to the external job id out of the B-Fabric - system. Since the file can not be stagged to the LRMS as argument, we copy the yaml file into the bash script + system. Since the file can not be staged to the LRMS as argument, we copy the yaml file into the bash script and stage it on execution the application. TODO(cp): create the output url before the application is started. return None """ - - # foreach (executable in external job): - for executable in self.B.get_executable_of_externaljobid(): - self.B.logger(f"executable = {executable}") - - try: - content = base64.b64decode(executable.base64.encode()).decode() - except: - raise ValueError("error: decoding executable.base64 failed.") - - print(content) + executables = Executable.find_by({"workunitid": self.workunit.id}, client=self._client).values() + for executable in executables: + logger.debug(f"executable = {executable}") + content = base64.b64decode(executable["base64"].encode()).decode() + logger.debug(content) _cmd_template = self.compose_bash_script( configuration=content, configuration_parser=lambda x: yaml.safe_load(x) ) - _bash_script_filename = f"/home/bfabric/prx/workunitid-{self.B.get_workunitid_of_externaljob()}_externaljobid-{self.B.externaljobid}_executableid-{executable._id}.bash" + bash_script_file = Path( + self._script_dir, + f"workunitid-{self.workunit.id}_externaljobid-{self.external_job.id}" + f"_executableid-{self.external_job.executable.id}.bash", + ) - with open(_bash_script_filename, "w") as f: - f.write(_cmd_template) + bash_script_file.write_text(_cmd_template) if self.scheduler == "GridEngine": - self.submit_gridengine(_bash_script_filename) + raise NotImplementedError + # self.submit_gridengine(bash_script_file) else: - self.submit_slurm(_bash_script_filename) - self.execfilelist.append(_bash_script_filename) + self.submit_slurm(str(bash_script_file)) + self._executable_file_list.append(str(bash_script_file)) - res = self.B.save_object(endpoint="externaljob", obj={"id": self.B.externaljobid, "status": "done"}) + self._client.save("externaljob", {"id": self.external_job.id, "status": "done"}) - def get_job_script(self): - return self.execfilelist + def get_job_script(self) -> list[str]: + return self._executable_file_list From 53da677d7b277efda344a2a7f719a38240ecb8d6 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 13:37:46 +0200 Subject: [PATCH 008/144] minor changes to wrapper creator --- .../wrapper_creator/bfabric_wrapper_creator.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py index 104f8611..a0a59eda 100644 --- a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py +++ b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -9,6 +9,7 @@ from typing import Any, Literal import yaml +from loguru import logger from bfabric import Bfabric from bfabric.bfabric_legacy import bfabricEncoder @@ -45,6 +46,7 @@ def _log_storage(self) -> Storage: def create_output_resource(self) -> Resource: # Since we use the id of the output resource in the path, we have to save it twice. + logger.info("Creating output resource") n_input_resource = len(self._workunit.input_resources) resource_id = self._client.save( "resource", @@ -62,10 +64,12 @@ def create_output_resource(self) -> Resource: relative_path = str(output_folder / output_filename) # Save the path + logger.info("Saving correct path") result = self._client.save("resource", {"id": resource_id, "relativepath": relative_path}) return Resource(result[0]) def create_log_resource(self, variant: Literal["out", "err"], output_resource: Resource) -> Resource: + logger.info("Creating log resource") result = self._client.save( "resource", { @@ -78,6 +82,7 @@ def create_log_resource(self, variant: Literal["out", "err"], output_resource: R return Resource(result[0]) def get_application_section(self, output_resource: Resource) -> dict[str, Any]: + logger.info("Creating application section") output_url = f"bfabric@{self._application.storage.data_dict['host']}:{self._application.storage.data_dict['basepath']}{output_resource.data_dict['relativepath']}" inputs = defaultdict(list) for resource in Resource.find_all(self.workunit_definition.execution.resources, client=self._client).values(): @@ -94,6 +99,7 @@ def get_application_section(self, output_resource: Resource) -> dict[str, Any]: def get_job_configuration_section( self, output_resource: Resource, stdout_resource: Resource, stderr_resource: Resource ) -> dict[str, Any]: + logger.info("Creating job configuration section") log_resource = {} for name, resource in [("stdout", stdout_resource), ("stderr", stderr_resource)]: @@ -143,7 +149,8 @@ def _fasta_sequence(self) -> str: else: return "" - def write_results(self, config_serialized: str) -> None: + def write_results(self, config_serialized: str) -> tuple[dict[str, Any], dict[str, Any]]: + logger.info("Saving executable") yaml_workunit_executable = self._client.save( "executable", { @@ -155,6 +162,7 @@ def write_results(self, config_serialized: str) -> None: "version": "10", }, )[0] + logger.info("Saving external job") yaml_workunit_externaljob = self._client.save( "externaljob", { @@ -163,15 +171,18 @@ def write_results(self, config_serialized: str) -> None: "executableid": yaml_workunit_executable["id"], "action": "WORKUNIT", }, - ) + )[0] # TODO now i am a bit confused, the external_job_id that is added to the .yml file is not the original one # but rather the one from the yaml_workunit_externaljob. I am not sure if we need this as it makes the # code here a lot more complex - print(yaml_workunit_externaljob) + logger.info(yaml_workunit_externaljob) + logger.info("Setting external job status to 'done'") self._client.save("externaljob", {"id": self._external_job_id, "status": "done"}) + return yaml_workunit_executable, yaml_workunit_externaljob + class BfabricWrapperCreatorOld(BfabricExternalJob): """ From 8cfbceb0ccca12a91b887d4f8ce98f835535d34d Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 13:38:19 +0200 Subject: [PATCH 009/144] add relationship --- docs/changelog.md | 4 ++++ src/bfabric/entities/externaljob.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 8b5d875d..61371725 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,6 +10,10 @@ Versioning currently follows `X.Y.Z` where ## \[Unreleased\] +### Added + +- Relationship: `ExternalJob.executable` + ### Fixed - `Entity.find_all` returns no values when an empty list is passed as an argument. diff --git a/src/bfabric/entities/externaljob.py b/src/bfabric/entities/externaljob.py index 3b9a6641..05f46595 100644 --- a/src/bfabric/entities/externaljob.py +++ b/src/bfabric/entities/externaljob.py @@ -5,6 +5,7 @@ from bfabric import Bfabric from bfabric.entities.core.entity import Entity +from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: from bfabric.entities.workunit import Workunit @@ -16,6 +17,8 @@ class ExternalJob(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None) -> None: super().__init__(data_dict=data_dict, client=client) + executable = HasOne(entity="Executable", bfabric_field="executable") + @cached_property def workunit(self) -> Workunit | None: from bfabric.entities.workunit import Workunit From a028966a034e89a83b9146e7b6926360c6f3d000 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 14:12:55 +0200 Subject: [PATCH 010/144] old logic of skipping excutables without base64 binary --- src/bfabric/wrapper_creator/bfabric_submitter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py index fd96c182..0d9d951e 100644 --- a/src/bfabric/wrapper_creator/bfabric_submitter.py +++ b/src/bfabric/wrapper_creator/bfabric_submitter.py @@ -219,6 +219,9 @@ def submitter_yaml(self) -> None: """ executables = Executable.find_by({"workunitid": self.workunit.id}, client=self._client).values() for executable in executables: + if not executable["base64"]: + continue + logger.debug(f"executable = {executable}") content = base64.b64decode(executable["base64"].encode()).decode() logger.debug(content) From bf28baf678df57785940d6bf692258eaff8b2ac9 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 14:24:59 +0200 Subject: [PATCH 011/144] unit test --- tests/bfabric/entities/test_externaljob.py | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/bfabric/entities/test_externaljob.py diff --git a/tests/bfabric/entities/test_externaljob.py b/tests/bfabric/entities/test_externaljob.py new file mode 100644 index 00000000..9b24e2c0 --- /dev/null +++ b/tests/bfabric/entities/test_externaljob.py @@ -0,0 +1,33 @@ +from typing import Any + +import pytest +from pytest_mock import MockerFixture + +from bfabric.entities import ExternalJob, Workunit + + +@pytest.fixture() +def data_dict(): + return { + "id": 1, + "cliententityclassname": "Workunit", + "cliententityid": 5, + } + + +def test_workunit_when_available(mocker: MockerFixture, data_dict: dict[str, Any]): + mock_client = mocker.MagicMock(name="mock_client", spec=[]) + mock_find = mocker.patch.object(Workunit, "find") + external_job = ExternalJob(data_dict, mock_client) + assert external_job.workunit == mock_find.return_value + mock_find.assert_called_once_with(id=5, client=mock_client) + + +def test_workunit_when_wrong_class(mocker: MockerFixture, data_dict: dict[str, Any]): + mock_client = mocker.MagicMock(name="mock_client", spec=[]) + mock_find = mocker.patch.object(Workunit, "find") + # TODO actually check which ones are the legal values here + data_dict["cliententityclassname"] = "WrongClass" + external_job = ExternalJob(data_dict, mock_client) + assert external_job.workunit is None + mock_find.assert_not_called() From a8d5ecb06ba70fcf9aa4f9c88f027664e1a7528d Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 16:47:09 +0200 Subject: [PATCH 012/144] refactor slurm.py --- .../wrapper_creator/bfabric_submitter.py | 6 +- src/bfabric/wrapper_creator/slurm.py | 60 +++++++++---------- tests/bfabric/wrapper_creator/__init__.py | 0 tests/bfabric/wrapper_creator/test_slurm.py | 50 ++++++++++++++++ 4 files changed, 80 insertions(+), 36 deletions(-) mode change 100755 => 100644 src/bfabric/wrapper_creator/slurm.py create mode 100644 tests/bfabric/wrapper_creator/__init__.py create mode 100644 tests/bfabric/wrapper_creator/test_slurm.py diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py index 0d9d951e..5a925486 100644 --- a/src/bfabric/wrapper_creator/bfabric_submitter.py +++ b/src/bfabric/wrapper_creator/bfabric_submitter.py @@ -71,11 +71,11 @@ def __init__( # resQsub = GE.qsub(script=script, arguments=arguments) # self.B.logger(f"{resQsub}") - def submit_slurm(self, script: str = "/tmp/runme.bash", arguments: str = "") -> None: - slurm = SLURM(user=self.user, SLURMROOT=self.scheduleroot) + def submit_slurm(self, script: str = "/tmp/runme.bash") -> None: + slurm = SLURM(slurm_root=self.scheduleroot) logger.debug(script) logger.debug(type(script)) - res_slurm_batch = slurm.sbatch(script=script, arguments=arguments) + res_slurm_batch = slurm.sbatch(script=script) logger.debug(f"{res_slurm_batch}") def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)) -> str: diff --git a/src/bfabric/wrapper_creator/slurm.py b/src/bfabric/wrapper_creator/slurm.py old mode 100755 new mode 100644 index f7683115..2315b492 --- a/src/bfabric/wrapper_creator/slurm.py +++ b/src/bfabric/wrapper_creator/slurm.py @@ -1,12 +1,13 @@ -#! /usr/bin/env python +from __future__ import annotations + +from loguru import logger + """ Interface to the SLURM (Simple Linux Utility for Resources Management) resource manager and job scheduler 2020-09-28 Maria d'Errico Christian Panse - -$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/slurm.py $ """ # Copyright (C) 2011, 2012 ETH Zurich and University of Zurich. All rights reserved. @@ -29,46 +30,39 @@ # limitations under the License. # -__docformat__ = "reStructuredText" -# __version__ = '$Revision: 2463 $' - +from pathlib import Path import os import subprocess class SLURM: - """ - interface to Slurm sbatch - """ - - def __init__(self, user="*", SLURMROOT="/usr/"): - """ - Set up parameters for querying Slurm. + """Wrapper for SLURM, providing a Python interface to `sbatch`. - SLURMROOT is essential. - """ - - self.user = user - self.sbatchbin = f"{SLURMROOT}/bin/sbatch" + The `slurm_root` variable will be passed as `SLURMROOT` to the environment, when submitting the script, and is an + important parameter which needs to be set correctly for our scripts to function properly. + """ - os.environ["SLURM_ROOT"] = SLURMROOT + def __init__(self, slurm_root: str | Path = "/usr/") -> None: + self._slurm_root = Path(slurm_root) + self._sbatch_bin = self._slurm_root / "bin/sbatch" - def sbatch(self, script, arguments=""): - """ - todo: pass stderr and stdout file location as argument + def sbatch(self, script: str | Path) -> tuple[str, str] | None: + """Submits the script to SLURM using `sbatch`. + If successful, returns a tuple with the stdout and stderr of the submission. """ - sbatch_cmd = [self.sbatchbin, script, " ".join(arguments)] - - if not os.path.isfile(self.sbatchbin): - print(f"{self.sbatchbin} can not be found.") + script = Path(script) + if not script.is_file(): + logger.error(f"Script not found: {script}") return - - if not os.path.isfile(script): - print(f"'{script}' - no such file.") + if not self._sbatch_bin.is_file(): + logger.error(f"sbatch binary not found: {self._sbatch_bin}") return - sbatch_process = subprocess.Popen(sbatch_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) - result = [x.decode("utf-8") for x in sbatch_process.communicate()] - - return "".join(result) + env = os.environ | {"SLURMROOT": self._slurm_root} + result = subprocess.run( + [self._sbatch_bin, script], env=env, check=True, shell=False, capture_output=True, encoding="utf-8" + ) + # TODO the code initially had a TODO to write these two to a file, in general I think the logs of the squeue + # are currently not written to a file at all. + return result.stdout, result.stderr diff --git a/tests/bfabric/wrapper_creator/__init__.py b/tests/bfabric/wrapper_creator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/bfabric/wrapper_creator/test_slurm.py b/tests/bfabric/wrapper_creator/test_slurm.py new file mode 100644 index 00000000..3f375609 --- /dev/null +++ b/tests/bfabric/wrapper_creator/test_slurm.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest +from logot import Logot, logged +from pytest_mock import MockerFixture + +from bfabric.wrapper_creator.slurm import SLURM + + +@pytest.fixture() +def mock_slurm() -> SLURM: + return SLURM(slurm_root=Path("/tmp/test_slurm")) + + +@pytest.mark.parametrize("path", ["/tmp/hello/world.txt", Path("/tmp/hello/world.txt")]) +def test_sbatch_when_success(mocker: MockerFixture, mock_slurm: SLURM, path: Path | str) -> None: + mock_is_file = mocker.patch.object(Path, "is_file", return_value=True) + mocker.patch("os.environ", new={"x": "y"}) + mock_run = mocker.patch("subprocess.run", return_value=mocker.MagicMock(stdout="stdout", stderr="stderr")) + stdout, stderr = mock_slurm.sbatch(script=path) + assert stdout == "stdout" + assert stderr == "stderr" + mock_run.assert_called_once_with( + [Path("/tmp/test_slurm/bin/sbatch"), Path(path)], + env={"SLURMROOT": Path("/tmp/test_slurm"), "x": "y"}, + check=True, + shell=False, + capture_output=True, + encoding="utf-8", + ) + assert mock_is_file.call_count == 2 + + +def test_sbatch_when_script_not_exists(mocker: MockerFixture, mock_slurm: SLURM, logot: Logot) -> None: + mocker.patch("bfabric.wrapper_creator.slurm.Path", side_effect=lambda x: x) + mock_script = mocker.MagicMock(name="script", is_file=lambda: False) + result = mock_slurm.sbatch(script=mock_script) + assert result is None + logot.assert_logged(logged.error(f"Script not found: {mock_script}")) + + +def test_sbatch_when_sbatch_not_exists(mocker: MockerFixture, mock_slurm: SLURM, logot: Logot) -> None: + mocker.patch("bfabric.wrapper_creator.slurm.Path", side_effect=lambda x: x) + mock_script = mocker.MagicMock(name="script", is_file=lambda: True) + mock_sbatch = mocker.patch.object(mock_slurm, "_sbatch_bin", mocker.MagicMock(is_file=lambda: False)) + result = mock_slurm.sbatch(script=mock_script) + assert result is None + logot.assert_logged(logged.error(f"sbatch binary not found: {mock_sbatch}")) From d2bc6f3c6a5bf67434084690c1715934154c25bc Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 16:51:05 +0200 Subject: [PATCH 013/144] typo --- src/bfabric/wrapper_creator/bfabric_submitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py index 5a925486..7e4bc4e6 100644 --- a/src/bfabric/wrapper_creator/bfabric_submitter.py +++ b/src/bfabric/wrapper_creator/bfabric_submitter.py @@ -138,7 +138,7 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: if [ $? -eq 1 ]; then - echo "writting to output url failed!"; + echo "writing to output url failed!"; exit 1; fi From 69fab11c23be71bda6ae9b9c4d50405b6658c939 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 8 Oct 2024 16:58:57 +0200 Subject: [PATCH 014/144] delete old wrapper creator code --- .../bfabric_wrapper_creator.py | 286 ------------------ 1 file changed, 286 deletions(-) diff --git a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py index a0a59eda..07087d62 100644 --- a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py +++ b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -1,21 +1,16 @@ from __future__ import annotations import base64 -import datetime -import json from collections import defaultdict from functools import cached_property from pathlib import Path from typing import Any, Literal -import yaml from loguru import logger from bfabric import Bfabric -from bfabric.bfabric_legacy import bfabricEncoder from bfabric.entities import Workunit, ExternalJob, Application, Resource, Storage, Order, Project from bfabric.experimental.app_interface.workunit.definition import WorkunitDefinition -from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob class BfabricWrapperCreator: @@ -182,284 +177,3 @@ def write_results(self, config_serialized: str) -> tuple[dict[str, Any], dict[st self._client.save("externaljob", {"id": self._external_job_id, "status": "done"}) return yaml_workunit_executable, yaml_workunit_externaljob - - -class BfabricWrapperCreatorOld(BfabricExternalJob): - """ - the class is used for the wrapper_creator which is executed by the bfabtic system - (non batch) so each resource is processed seperate - """ - - (externaljobid_submitter, workunit_executableid) = (None, None) - - def get_externaljobid_yaml_workunit(self): - return self.externaljobid_yaml_workunit - - def get_executableid(self): - return self.workunit_executableid - - def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): - """ - This method writes all related parameters into a yaml file which is than upload as base64 encoded - file into the b-fabric system. - - if the method does not excepted at the end it reports also the status of the external_job. - - TODO(cp): make this function more generic so that it can also export xml, json, yaml, ... - """ - - # Inherits all parameters of the application executable out of B-Fabric to create an executable script - workunitid = self.get_workunitid_of_externaljob() - - if workunitid is None: - raise ValueError("no workunit available for the given externaljobid.") - - workunit = self.read_object(endpoint="workunit", obj={"id": workunitid})[0] - if workunit is None: - raise ValueError("ERROR: no workunit available for the given externaljobid.") - - assert isinstance(workunit._id, int) - - application = self.read_object("application", obj={"id": workunit.application._id})[0] - # TODO(cp): rename to application_execuatbel - workunit_executable = self.read_object("executable", obj={"id": workunit.applicationexecutable._id})[0] - try: - self.workunit_executableid = workunit_executable._id - except: - self.workunit_executableid = None - - # Get container details - container = workunit.container - fastasequence = "" - if container._classname == "order": - order = self.read_object("order", obj={"id": container._id})[0] - order_id = order._id - if "project" in order: # noqa - project_id = order.project._id - else: - project_id = None - if "fastasequence" in order: - fastasequence = "\n".join([x.strip() for x in str(order.fastasequence).split("\r")]) - else: - order_id = None - project_id = container._id - - today = datetime.date.today() - - # merge all information into the executable script - _output_storage = self.read_object("storage", obj={"id": application.storage._id})[0] - - _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( # noqa - container._id, - application.technology.replace(" ", "_"), - application.name.replace(" ", "_"), - today.strftime("%Y/%Y-%m/%Y-%m-%d/"), - workunitid, - ) - - # Setup the log_storage to SlurmLog with id 13 - _log_storage = self.read_object("storage", obj={"id": 13})[0] - - # _cmd_applicationList = [workunit_executable.program] - - application_parameter = {} - - if getattr(workunit, "parameter", None) is not None: - for para in workunit.parameter: - parameter = self.read_object("parameter", obj={"id": para._id}) - if parameter: - for p in parameter: - try: - application_parameter[f"{p.key}"] = f"{p.value}" - except: - application_parameter[f"{p.key}"] = "" - - try: - input_resources = [x._id for x in workunit.inputresource] - input_resources = [self.read_object(endpoint="resource", obj={"id": x})[0] for x in input_resources] - except: - print("no input resources found. continue with empty list.") - input_resources = [] - - # query all urls and ids of the input resources - resource_urls = dict() - resource_ids = dict() - - for resource_iterator in input_resources: - try: - _appication_id = self.read_object(endpoint="workunit", obj={"id": resource_iterator.workunit._id})[ - 0 - ].application._id - - _application_name = f"{self.read_object('application', obj={'id': _appication_id})[0].name}" - - _storage = self.read_object("storage", {"id": resource_iterator.storage._id})[0] - - _inputUrl = f"bfabric@{_storage.host}:/{_storage.basepath}/{resource_iterator.relativepath}" - - if _application_name not in resource_urls: - resource_urls[_application_name] = [] - resource_ids[_application_name] = [] - - resource_urls[_application_name].append(_inputUrl) - - sample_id = self.get_sampleid(int(resource_iterator._id)) - - _resource_sample = { - "resource_id": int(resource_iterator._id), - "resource_url": f"{self.config.base_url}/userlab/show-resource.html?id={resource_iterator._id}", - } - - if sample_id is not None: - _resource_sample["sample_id"] = int(sample_id) - _resource_sample["sample_url"] = f"{self.config.base_url}/userlab/show-sample.html?id={sample_id}" - - resource_ids[_application_name].append(_resource_sample) - except: - print("resource_iterator failed. continue ...") - pass - - # create resources for output, stderr, stdout - _ressource_output = self.save_object( - "resource", - { - "name": f"{application.name} {len(input_resources)} - resource", - "workunitid": workunit._id, - "storageid": int(application.storage._id), - "relativepath": _output_relative_path, - }, - )[0] - - print(_ressource_output) - _output_filename = f"{_ressource_output._id}.{application.outputfileformat}" - # we want to include the resource._id into the filename - _ressource_output = self.save_object( - "resource", - { - "id": int(_ressource_output._id), - "relativepath": f"{_output_relative_path}/{_output_filename}", - }, - )[0] - - print(_ressource_output) - _resource_stderr = self.save_object( - "resource", - { - "name": "slurm_stderr", - "workunitid": int(workunit._id), - "storageid": _log_storage._id, - "relativepath": f"/workunitid-{workunit._id}_resourceid-{_ressource_output._id}.err", - }, - )[0] - - _resource_stdout = self.save_object( - "resource", - { - "name": "slurm_stdout", - "workunitid": workunit._id, - "storageid": _log_storage._id, - "relativepath": f"/workunitid-{workunit._id}_resourceid-{_ressource_output._id}.out", - }, - )[0] - - # Creates the workunit executable - # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. - # The yaml_workunit_externaljob cannot be created without specifying an executableid: - # a yaml_workunit_executable is thus created before the config definition in order to provide - # the correct executableid to the yaml_workunit_externaljob. - # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() - yaml_workunit_executable = self.save_object( - "executable", - { - "name": "job configuration (executable) in YAML", - "context": "WORKUNIT", - "workunitid": workunit._id, - "description": "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter.", - }, - )[0] - print(yaml_workunit_executable) - - yaml_workunit_externaljob = self.save_object( - "externaljob", - { - "workunitid": workunit._id, - "status": "new", - "executableid": yaml_workunit_executable._id, - "action": "WORKUNIT", - }, - )[0] - print(yaml_workunit_externaljob) - assert isinstance(yaml_workunit_externaljob._id, int) - self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) - print(f"XXXXXXX self.externaljobid_yaml_workunit ={self.externaljobid_yaml_workunit} XXXXXXX") - - _output_url = ( - f"bfabric@{_output_storage.host}:{_output_storage.basepath}{_output_relative_path}/{_output_filename}" - ) - - try: - query_obj = {"id": workunit.inputdataset._id} - inputdataset = self.read_object(endpoint="dataset", obj=query_obj)[0] - inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) - inputdataset = json.loads(inputdataset_json) - except: - inputdataset = None - - # Compose configuration structure - config = { - "job_configuration": { - "executable": f"{workunit_executable.program}", - "inputdataset": inputdataset, - "input": resource_ids, - "output": { - "protocol": "scp", - "resource_id": int(_ressource_output._id), - "ssh_args": "-o StrictHostKeyChecking=no -2 -l bfabric -x", - }, - "stderr": { - "protocol": "file", - "resource_id": int(_resource_stderr._id), - "url": f"{_log_storage.basepath}/workunitid-{workunit._id}_resourceid-{_ressource_output._id}.err", - }, - "stdout": { - "protocol": "file", - "resource_id": int(_resource_stdout._id), - "url": f"{_log_storage.basepath}/workunitid-{workunit._id}_resourceid-{_ressource_output._id}.out", - }, - "workunit_id": int(workunit._id), - "workunit_createdby": str(workunit.createdby), - "workunit_url": f"{self.config.base_url}/userlab/show-workunit.html?workunitId={workunit._id}", - "external_job_id": int(yaml_workunit_externaljob._id), - "order_id": order_id, - "project_id": project_id, - "fastasequence": fastasequence, - }, - "application": { - "protocol": "scp", - "parameters": application_parameter, - "input": resource_urls, - "output": [_output_url], - }, - } - - config_serialized = data_serializer(config) - print(config_serialized) - - yaml_workunit_executable = self.save_object( - "executable", - { - "id": yaml_workunit_executable._id, - "base64": base64.b64encode(config_serialized.encode()).decode(), - "version": f"{10}", - }, - )[0] - print(yaml_workunit_executable) - - # The WrapperCreator executable is successful, and the status of the its external job is set to done, - # which triggers B-Fabric to create an external job for the submitter executable. - - wrapper_creator_externaljob = self.save_object( - endpoint="externaljob", obj={"id": self.externaljobid, "status": "done"} - ) - - print(f"\n\nquery_counter={self.query_counter}") From f37de75e90ae7b7664fbdfcb87e8eb4775fe0f8c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 09:35:09 +0200 Subject: [PATCH 015/144] implement entity_lookup_cache.py --- docs/changelog.md | 1 + .../experimental/entity_lookup_cache.py | 98 +++++++++++++++++++ .../experimental/test_entity_lookup_cache.py | 94 ++++++++++++++++++ 3 files changed, 193 insertions(+) create mode 100644 src/bfabric/experimental/entity_lookup_cache.py create mode 100644 tests/bfabric/experimental/test_entity_lookup_cache.py diff --git a/docs/changelog.md b/docs/changelog.md index 61371725..4e7622c7 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -12,6 +12,7 @@ Versioning currently follows `X.Y.Z` where ### Added +- (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. - Relationship: `ExternalJob.executable` ### Fixed diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py new file mode 100644 index 00000000..e6d05266 --- /dev/null +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from collections import defaultdict, OrderedDict +from collections.abc import Hashable +from contextlib import contextmanager +from typing import Any, TYPE_CHECKING + +from loguru import logger + +if TYPE_CHECKING: + from bfabric.entities.core.entity import Entity + + +class Cache: + """A FIFO cache with a maximum size, implemented by an OrderedDict.""" + + def __init__(self, max_size: int) -> None: + self._entries = OrderedDict() + self._max_size = max_size + + def get(self, key: Hashable) -> Any | None: + """Returns the value with the given key, if it exists, and marks it as used. + + If the key does not exist, returns None. + """ + if key in self._entries: + self._entries.move_to_end(key) + return self._entries[key] + + def put(self, key: Hashable, value: Any) -> None: + """Puts a key-value pair into the cache, marking it as used.""" + if self._max_size != 0 and len(self._entries) >= self._max_size: + self._entries.popitem(last=False) + self._entries[key] = value + + def __contains__(self, key: Hashable) -> bool: + """Returns whether the cache contains a key.""" + return key in self._entries + + +class EntityLookupCache: + """Implements the logic for caching entity lookup. + + :param max_size: The maximum size of the cache. If 0, the cache has no size limit. + """ + + __class_instance = None + + def __init__(self, max_size: int = 0) -> None: + self._caches = defaultdict(lambda: Cache(max_size=max_size)) + + def contains(self, entity_type: type[Entity], entity_id: int) -> bool: + """Returns whether the cache contains an entity with the given type and ID.""" + return entity_id in self._caches[entity_type] + + def get(self, entity_type: type[Entity], entity_id: int) -> Entity | None: + """Returns the entity with the given type and ID, if it exists in the cache.""" + if self._caches[entity_type].get(entity_id): + logger.debug(f"Cache hit for entity {entity_type} with ID {entity_id}") + return self._caches[entity_type].get(entity_id) + else: + logger.debug(f"Cache miss for entity {entity_type} with ID {entity_id}") + + def get_all(self, entity_type: type[Entity], entity_ids: list[int]) -> dict[int, Entity]: + """Returns a dictionary of entities with the given type and IDs, + containing only the entities that exist in the cache. + """ + return { + entity_id: self.get(entity_type, entity_id) + for entity_id in entity_ids + if self.contains(entity_type, entity_id) + } + + def put(self, entity_type: type[Entity], entity_id: int, entity: Entity) -> None: + """Puts an entity with the given type and ID into the cache.""" + logger.debug(f"Caching entity {entity_type} with ID {entity_id}") + self._caches[entity_type].put(entity_id, entity) + + @classmethod + @contextmanager + def enable(cls, max_size: int = 0): + """Context manager that enables the EntityLookupCache singleton instance, i.e. every entity lookup by ID + within this context will be cached. The cache is cleared after the context exits. + """ + existing_cache = cls.__class_instance is not None + if not existing_cache: + cls.__class_instance = cls(max_size=max_size) + # TODO what to do if existing_cache and max_size mismatch? + try: + yield + finally: + if not existing_cache: + cls.__class_instance = None + + @classmethod + def instance(cls) -> EntityLookupCache | None: + """Returns the singleton instance of the EntityLookupCache.""" + return cls.__class_instance diff --git a/tests/bfabric/experimental/test_entity_lookup_cache.py b/tests/bfabric/experimental/test_entity_lookup_cache.py new file mode 100644 index 00000000..13daee1e --- /dev/null +++ b/tests/bfabric/experimental/test_entity_lookup_cache.py @@ -0,0 +1,94 @@ +import pytest + +from bfabric.experimental.entity_lookup_cache import Cache, EntityLookupCache + + +@pytest.fixture() +def max_size() -> int: + return 3 + + +@pytest.fixture() +def cache(max_size: int): + cache = Cache(max_size=max_size) + cache.put("key1", "value1") + cache.put("key2", "value2") + return cache + + +@pytest.fixture() +def entity_cache(max_size: int): + result = EntityLookupCache(max_size=max_size) + result.put("Entity1", 1, "value1") + result.put("Entity1", 2, "value2") + return result + + +def test_cache_get_when_exists(cache): + assert cache.get("key1") == "value1" + assert cache.get("key2") == "value2" + + +def test_cache_get_when_not_exists(cache): + assert cache.get("missing") is None + + +@pytest.mark.parametrize("max_size", [0, 3]) +def test_cache_put(cache, max_size): + cache.put("key3", "value3") + cache.put("key4", "value4") + if max_size == 3: + assert cache.get("key1") is None + else: + assert cache.get("key1") == "value1" + assert cache.get("key2") == "value2" + assert cache.get("key3") == "value3" + assert cache.get("key4") == "value4" + + +def test_cache_contains(cache): + assert "key1" in cache + assert "key2" in cache + assert "key3" not in cache + + +def test_entity_lookup_cache_contains(entity_cache): + assert entity_cache.contains("Entity1", 1) + assert entity_cache.contains("Entity1", 2) + assert not entity_cache.contains("Entity1", 3) + assert not entity_cache.contains("Entity2", 1) + + +def test_entity_lookup_cache_get_when_exists(entity_cache): + assert entity_cache.get("Entity1", 1) == "value1" + assert entity_cache.get("Entity1", 2) == "value2" + + +def test_entity_lookup_cache_get_when_not_exists(entity_cache): + assert entity_cache.get("Entity1", 3) is None + + +def test_entity_lookup_cache_get_all(entity_cache): + result = entity_cache.get_all("Entity1", [1, 2, 3]) + assert result == {1: "value1", 2: "value2"} + + +def test_entity_lookup_cache_put(entity_cache): + entity_cache.put("Entity1", 3, "value3") + entity_cache.put("Entity1", 4, "value4") + assert entity_cache.get("Entity1", 1) is None + assert entity_cache.get("Entity1", 2) == "value2" + assert entity_cache.get("Entity1", 3) == "value3" + assert entity_cache.get("Entity1", 4) == "value4" + + +def test_entity_lookup_cache_enable(entity_cache): + assert entity_cache.instance() is None + with entity_cache.enable(): + first_instance = entity_cache.instance() + assert first_instance is not None + with entity_cache.enable(): + second_instance = entity_cache.instance() + assert first_instance is second_instance + assert entity_cache.instance() is first_instance + assert entity_cache.instance() is None From 2b285a1037f2bceb99fbee184590a47181d77ee3 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 09:45:48 +0200 Subject: [PATCH 016/144] integrate the experimental cache in entity.py --- src/bfabric/entities/core/entity.py | 78 ++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index cea55b0f..a5b81e6f 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -1,11 +1,13 @@ from __future__ import annotations from typing import TYPE_CHECKING +from collections.abc import Iterable from loguru import logger from bfabric import Bfabric from bfabric.experimental import MultiQuery +from bfabric.experimental.entity_lookup_cache import EntityLookupCache if TYPE_CHECKING: from typing import Any, Self @@ -39,25 +41,39 @@ def _client(self) -> Bfabric | None: @classmethod def find(cls, id: int, client: Bfabric) -> Self | None: - result = client.read(cls.ENDPOINT, obj={"id": int(id)}) - return cls(result[0], client=client) if len(result) == 1 else None + """Finds an entity by its ID, if it does not exist `None` is returned.""" + cache = EntityLookupCache.instance() + if cache and cache.contains(entity_type=cls, entity_id=id): + entity = cache.get(entity_type=cls, entity_id=id) + else: + result = client.read(cls.ENDPOINT, obj={"id": int(id)}) + entity = cls(result[0], client=client) if len(result) == 1 else None + if cache: + cache.put(entity_type=cls, entity_id=id, entity=entity) + return entity @classmethod def find_all(cls, ids: list[int], client: Bfabric) -> dict[int, Self]: - ids = [int(id) for id in ids] - if len(ids) > 100: - result = MultiQuery(client).read_multi(cls.ENDPOINT, {}, "id", ids) - elif not ids: - return {} - else: - result = client.read(cls.ENDPOINT, obj={"id": ids}) - results = {x["id"]: cls(x, client=client) for x in result} - if len(results) != len(ids): - logger.warning(f"Only found {len(results)} out of {len(ids)}.") - return results + """Returns a dictionary of entities with the given IDs. The order will generally match the input, however, + if some entities are not found they will be omitted and a warning will be logged.""" + cache = EntityLookupCache.instance() + ids_requested = cls.__check_ids_list(ids) + + # retrieve entities from cache and from B-Fabric as needed + results_cached = cache.get_all(entity_type=cls, entity_ids=ids) if cache else {} + results_fresh = cls.__retrieve_entities( + client=client, ids_requested=ids_requested, ids_cached=results_cached.keys() + ) + + if cache: + for entity_id, entity in results_fresh.items(): + cache.put(entity_type=cls, entity_id=entity_id, entity=entity) + + return cls.__ensure_results_order(ids_requested, results_cached, results_fresh) @classmethod def find_by(cls, obj: dict[str, Any], client: Bfabric, max_results: int | None = 100) -> dict[int, Self]: + """Returns a dictionary of entities that match the given query.""" result = client.read(cls.ENDPOINT, obj=obj, max_results=max_results) return {x["id"]: cls(x, client=client) for x in result} @@ -80,3 +96,39 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}({repr(self.__data_dict)}, client={repr(self.__client)})" __str__ = __repr__ + + @classmethod + def __check_ids_list(cls, ids) -> list[int]: + """Converts the ids to a list of integers (if they are not already) and raises an error if this fails or + there are duplicates.""" + ids_requested = [int(id) for id in ids] + if len(ids_requested) != len(set(ids_requested)): + duplicates = [item for item in set(ids_requested) if ids_requested.count(item) > 1] + raise ValueError(f"Duplicate IDs are not allowed, duplicates: {duplicates}") + return ids_requested + + @classmethod + def __retrieve_entities( + cls, client: Bfabric, ids_requested: list[int], ids_cached: Iterable[int] + ) -> dict[int, Self]: + """Retrieves entities from B-Fabric that are not already in the cache""" + ids = list(set(ids_requested) - set(ids_cached)) + if ids: + if len(ids) > 100: + result = MultiQuery(client).read_multi(cls.ENDPOINT, {}, "id", ids) + else: + result = client.read(cls.ENDPOINT, obj={"id": ids}) + return {x["id"]: cls(x, client=client) for x in result} + else: + return {} + + @classmethod + def __ensure_results_order( + cls, ids_requested: list[int], results_cached: dict[int, Self], results_fresh: dict[int, Self] + ) -> dict[int, Self]: + """Ensures the results are in the same order as requested and prints a warning if some results are missing.""" + results = {**results_cached, **results_fresh} + results = {entity_id: results[entity_id] for entity_id in ids_requested} + if len(results) != len(ids_requested): + logger.warning(f"Only found {len(results)} out of {len(ids_requested)}.") + return results From 7721007b79813b7498466791ac8289f8567510fc Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 09:45:57 +0200 Subject: [PATCH 017/144] __init__.py --- tests/bfabric/experimental/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/bfabric/experimental/__init__.py diff --git a/tests/bfabric/experimental/__init__.py b/tests/bfabric/experimental/__init__.py new file mode 100644 index 00000000..e69de29b From de15f76ef0679ede7fe7bb2b4109140526bf1459 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 09:47:27 +0200 Subject: [PATCH 018/144] correct behavior when missing values --- src/bfabric/entities/core/entity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index a5b81e6f..507fecfe 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -1,7 +1,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING from collections.abc import Iterable +from typing import TYPE_CHECKING from loguru import logger @@ -128,7 +128,7 @@ def __ensure_results_order( ) -> dict[int, Self]: """Ensures the results are in the same order as requested and prints a warning if some results are missing.""" results = {**results_cached, **results_fresh} - results = {entity_id: results[entity_id] for entity_id in ids_requested} + results = {entity_id: results[entity_id] for entity_id in ids_requested if entity_id in results} if len(results) != len(ids_requested): logger.warning(f"Only found {len(results)} out of {len(ids_requested)}.") return results From 3816fd16aa1b87c9c75ed0b42a0e545739f5fae4 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 10:10:20 +0200 Subject: [PATCH 019/144] Create dependabot.yml --- .github/dependabot.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..aeac7333 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + groups: + actions: + patterns: + - "*" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + groups: + actions: + patterns: + - "*" From 0fb06ee16702b0bf8e15bfea109755439002e431 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 10:04:49 +0200 Subject: [PATCH 020/144] note some important todo for the cache --- src/bfabric/experimental/entity_lookup_cache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index e6d05266..ed96aeb1 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -86,6 +86,9 @@ def enable(cls, max_size: int = 0): if not existing_cache: cls.__class_instance = cls(max_size=max_size) # TODO what to do if existing_cache and max_size mismatch? + # TODO another relevant use case could be selectively caching only some entities, whereas others should be + # reloaded + # TODO finally, there is the question about persistent caches (e.g. storages do not change that often) try: yield finally: From c8f45278fb153054a472dfc04e4ce5eade93b8bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Oct 2024 08:10:44 +0000 Subject: [PATCH 021/144] Bump actions/setup-python from 2 to 5 in the actions group Bumps the actions group with 1 update: [actions/setup-python](https://github.com/actions/setup-python). Updates `actions/setup-python` from 2 to 5 - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v2...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions ... Signed-off-by: dependabot[bot] --- .github/workflows/run_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index 01e61638..8b118217 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install nox From e92a577e59dce1a02817f69aa2a5df8e6e63a5c7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 9 Oct 2024 11:01:46 +0200 Subject: [PATCH 022/144] app_interface cli "inputs list" does not fail anymore if resources have no "name" field value --- docs/changelog.md | 3 ++- .../experimental/app_interface/cli/inputs.py | 3 ++- .../app_interface/input_preparation/_spec.py | 18 +++++++++++++++++- .../app_interface/input_preparation/prepare.py | 8 +++++--- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 4e7622c7..9484268a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -12,12 +12,13 @@ Versioning currently follows `X.Y.Z` where ### Added -- (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. - Relationship: `ExternalJob.executable` +- (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. ### Fixed - `Entity.find_all` returns no values when an empty list is passed as an argument. +- (experimental) app_interface cli "inputs list" does not fail anymore if resources have no "name" field value. ## \[1.13.8\] - 2024-10-03 diff --git a/src/bfabric/experimental/app_interface/cli/inputs.py b/src/bfabric/experimental/app_interface/cli/inputs.py index 83c37851..7ffafeac 100644 --- a/src/bfabric/experimental/app_interface/cli/inputs.py +++ b/src/bfabric/experimental/app_interface/cli/inputs.py @@ -65,4 +65,5 @@ def list( ) -> None: """Lists the input files for an app.""" setup_script_logging() - print_input_files_list(inputs_yaml=inputs_yaml, target_folder=target_folder) + client = Bfabric.from_config() + print_input_files_list(inputs_yaml=inputs_yaml, target_folder=target_folder, client=client) diff --git a/src/bfabric/experimental/app_interface/input_preparation/_spec.py b/src/bfabric/experimental/app_interface/input_preparation/_spec.py index 0a546648..df6b75df 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/_spec.py +++ b/src/bfabric/experimental/app_interface/input_preparation/_spec.py @@ -1,14 +1,19 @@ from __future__ import annotations from pathlib import Path -from typing import Annotated, Literal, Union +from typing import Annotated, Literal, Union, TYPE_CHECKING import yaml from pydantic import BaseModel, ConfigDict, Field, Discriminator +from bfabric.entities import Resource + # ":" are not allowed, as well as absolute paths (starting with "/") RelativeFilePath = Annotated[str, Field(pattern=r"^[^/][^:]*$")] +if TYPE_CHECKING: + from bfabric.bfabric import Bfabric + class ResourceSpec(BaseModel): model_config = ConfigDict(extra="forbid") @@ -18,6 +23,13 @@ class ResourceSpec(BaseModel): filename: RelativeFilePath | None = None check_checksum: bool = True + def resolve_filename(self, client: Bfabric) -> str: + if self.filename: + return self.filename + else: + resource = Resource.find(id=self.id, client=client) + return resource["name"] + class DatasetSpec(BaseModel): model_config = ConfigDict(extra="forbid") @@ -26,9 +38,13 @@ class DatasetSpec(BaseModel): id: int filename: RelativeFilePath separator: Literal[",", "\t"] = "," + # has_header: bool # invalid_characters: str = "" + def resolve_filename(self, client: Bfabric) -> str: + return self.filename + InputSpecType = Annotated[Union[ResourceSpec, DatasetSpec], Discriminator("type")] diff --git a/src/bfabric/experimental/app_interface/input_preparation/prepare.py b/src/bfabric/experimental/app_interface/input_preparation/prepare.py index 8876e9b0..17cff05d 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/prepare.py +++ b/src/bfabric/experimental/app_interface/input_preparation/prepare.py @@ -83,8 +83,8 @@ def prepare_dataset(self, spec: DatasetSpec) -> None: target_path.write_text(tmp_file.read().decode()) def clean_resource(self, spec: ResourceSpec) -> None: - name = spec.filename if spec.filename else Resource.find(id=spec.id, client=self._client)["name"] - path = self._working_dir / name + filename = spec.resolve_filename(client=self._client) + path = self._working_dir / filename if path.exists(): logger.info(f"Removing {path}") path.unlink() @@ -124,6 +124,7 @@ def prepare_folder( def print_input_files_list( inputs_yaml: Path, target_folder: Path, + client: Bfabric, ) -> None: """Prints a list of inputs and whether they exist locally.""" specs_list = InputsSpec.read_yaml(inputs_yaml) @@ -133,7 +134,8 @@ def print_input_files_list( Column("Exists Locally"), ) for spec in specs_list: - path = target_folder / spec.filename if target_folder else Path(spec.filename) + filename = spec.resolve_filename(client=client) + path = target_folder / filename if target_folder else Path(filename) table.add_row( str(path), "Resource" if isinstance(spec, ResourceSpec) else "Dataset", From 2172a22c5505d0a0d1e1ebf07ea96623127a5feb Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 10:17:30 +0200 Subject: [PATCH 023/144] add a disclaimer --- src/bfabric/experimental/app_interface/cli/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bfabric/experimental/app_interface/cli/__main__.py b/src/bfabric/experimental/app_interface/cli/__main__.py index 435743e4..56868382 100644 --- a/src/bfabric/experimental/app_interface/cli/__main__.py +++ b/src/bfabric/experimental/app_interface/cli/__main__.py @@ -8,7 +8,7 @@ from bfabric.experimental.app_interface.cli.outputs import app_outputs from bfabric.experimental.app_interface.cli.validate import app_validate -app = cyclopts.App() +app = cyclopts.App(help="Provides an entrypoint to app execution.\n\nFunctionality/API under active development!") app.command(app_inputs) app.command(app_outputs) app.command(app_app) From 97ec420e199ff37407a827a22b50b7e6e1e3665f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 10:18:10 +0200 Subject: [PATCH 024/144] add script definition --- docs/changelog.md | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 9484268a..21142ce2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -13,6 +13,7 @@ Versioning currently follows `X.Y.Z` where ### Added - Relationship: `ExternalJob.executable` +- (experimental) Provide a script `bfabric-app-runner` to allow testing with pipx. - (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 9fb47bec..a4654b6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_save_workunit_attribute.py"="bfabric_scripts.bfabric_save_workunit_attribute:main" "bfabric_save_workflowstep.py"="bfabric_scripts.bfabric_save_workflowstep:main" "bfabric_slurm_queue_status.py"="bfabric_scripts.bfabric_slurm_queue_status:main" +"bfabric-app-runner"="bfabric.experimental.app_interface.cli.__main__:app" [tool.black] line-length = 120 From e677489db5433b70491951b7481b95bfa8e25979 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 11:32:38 +0200 Subject: [PATCH 025/144] use host path for work dir path in container by default --- src/bfabric/experimental/app_interface/app_runner/_spec.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/bfabric/experimental/app_interface/app_runner/_spec.py b/src/bfabric/experimental/app_interface/app_runner/_spec.py index e8312678..dfa2a213 100644 --- a/src/bfabric/experimental/app_interface/app_runner/_spec.py +++ b/src/bfabric/experimental/app_interface/app_runner/_spec.py @@ -20,7 +20,7 @@ def to_shell(self) -> list[str]: class MountOptions(BaseModel): - work_dir_target: Path = "/work" + work_dir_target: Path | None = None read_only: list[tuple[Path, Path]] = [] share_bfabric_config: bool = True @@ -28,7 +28,10 @@ def collect(self, work_dir: Path): mounts = [] if self.share_bfabric_config: mounts.append((Path("~/.bfabricpy.yml"), Path("/home/user/.bfabricpy.yml"), True)) - mounts.append((work_dir, self.work_dir_target, False)) + # TODO reconsider if we ever want work_dir_target to be customizable to be different from host path + # (currently things will break down if this is configured) + work_dir_target = work_dir if self.work_dir_target is None else self.work_dir_target + mounts.append((work_dir, work_dir_target, False)) for source, target in self.read_only: mounts.append((source, target, True)) return [(source.expanduser().absolute(), target, read_only) for source, target, read_only in mounts] From 756e235dd8bdfc5126a4d36d0d60f06130d95ad2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 12:34:46 +0200 Subject: [PATCH 026/144] resolve some more paths --- src/bfabric/experimental/app_interface/cli/app.py | 1 + src/bfabric/experimental/app_interface/cli/chunk.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/bfabric/experimental/app_interface/cli/app.py b/src/bfabric/experimental/app_interface/cli/app.py index d81d953c..c3ca6fd9 100644 --- a/src/bfabric/experimental/app_interface/cli/app.py +++ b/src/bfabric/experimental/app_interface/cli/app.py @@ -50,6 +50,7 @@ def dispatch( :param workunit_ref: Reference to the workunit (ID or YAML file path). """ setup_script_logging() + work_dir = work_dir.resolve() # TODO set workunit to processing? (i.e. add read-only option here) client = Bfabric.from_config() runner = Runner(spec=AppSpec.model_validate(yaml.safe_load(app_spec.read_text())), client=client, ssh_user=None) diff --git a/src/bfabric/experimental/app_interface/cli/chunk.py b/src/bfabric/experimental/app_interface/cli/chunk.py index 5392dd97..f82bfbf3 100644 --- a/src/bfabric/experimental/app_interface/cli/chunk.py +++ b/src/bfabric/experimental/app_interface/cli/chunk.py @@ -60,6 +60,7 @@ def inputs( """ setup_script_logging() client = Bfabric.from_config() + chunk_dir = chunk_dir.resolve() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) runner = Runner(spec=app_spec_parsed, client=client, ssh_user=ssh_user) @@ -76,6 +77,7 @@ def process(app_spec: Path, chunk_dir: Path) -> None: """ setup_script_logging() client = Bfabric.from_config() + chunk_dir = chunk_dir.resolve() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) runner = Runner(spec=app_spec_parsed, client=client, ssh_user=None) @@ -103,6 +105,7 @@ def outputs( """ setup_script_logging() client = Bfabric.from_config() + chunk_dir = chunk_dir.resolve() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) runner = Runner(spec=app_spec_parsed, client=client, ssh_user=ssh_user) From d14c86e869ca388e1b4b2596e0a4f3a589da0e0c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 12:48:44 +0200 Subject: [PATCH 027/144] resolve workunit_ref as well --- src/bfabric/experimental/app_interface/app_runner/runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/bfabric/experimental/app_interface/app_runner/runner.py b/src/bfabric/experimental/app_interface/app_runner/runner.py index e375f056..59bb0bb2 100644 --- a/src/bfabric/experimental/app_interface/app_runner/runner.py +++ b/src/bfabric/experimental/app_interface/app_runner/runner.py @@ -66,6 +66,9 @@ def run_app( read_only: bool = False, dispatch_active: bool = True, ) -> None: + work_dir = work_dir.resolve() + workunit_ref = workunit_ref.resolve() if isinstance(workunit_ref, Path) else workunit_ref + workunit_definition_file = work_dir / "workunit_definition.yml" workunit_definition = WorkunitDefinition.from_ref( workunit=workunit_ref, client=client, cache_file=workunit_definition_file From 530704b2311c9b535b60b6b86342d25104c9810e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 13:29:27 +0200 Subject: [PATCH 028/144] use polars when possible --- docs/changelog.md | 4 ++++ pyproject.toml | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 21142ce2..6bd26185 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -21,6 +21,10 @@ Versioning currently follows `X.Y.Z` where - `Entity.find_all` returns no values when an empty list is passed as an argument. - (experimental) app_interface cli "inputs list" does not fail anymore if resources have no "name" field value. +### Changed + +- Except for macOS x86_64 (which we assume is Rosetta emulation nowadays), we use the faster `polars` instead of `polars-lts-cpu`. + ## \[1.13.8\] - 2024-10-03 This release contains mainly internal changes and ongoing development on the experimental app interface functionality. diff --git a/pyproject.toml b/pyproject.toml index a4654b6b..0997ab21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,8 @@ dependencies = [ "Flask >= 3.0.3", "rich >= 13.7.1", "zeep >= 4.2.1", - "polars-lts-cpu >= 0.20.25", + "polars-lts-cpu >= 0.20.25; platform_machine == 'x86_64' and platform_system == 'Darwin'", + "polars >= 0.20.25; platform_machine != 'x86_64' or platform_system != 'Darwin'", "loguru>=0.7", "setuptools", "pydantic", From 531200c3bf74c6e06ba1bc7c2dfdb37de22367a1 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 13:40:50 +0200 Subject: [PATCH 029/144] print versions in environment --- noxfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/noxfile.py b/noxfile.py index d9ba6d3c..b905de95 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,4 +6,5 @@ @nox.session def tests(session): session.install(".[dev]") + session.run("uv", "pip", "list") session.run("pytest") From bdb8e50efacd441256a685286016674a675f69bd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 13:45:18 +0200 Subject: [PATCH 030/144] install less packages when testing --- noxfile.py | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index b905de95..01abf6f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,6 +5,6 @@ @nox.session def tests(session): - session.install(".[dev]") + session.install(".[test]") session.run("uv", "pip", "list") session.run("pytest") diff --git a/pyproject.toml b/pyproject.toml index 0997ab21..bb3daaa9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,8 @@ dev = [ "mkdocs-material", "mkdocstrings[python]", ] +doc = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"] +test = ["pytest", "pytest-mock", "logot"] [project.urls] Homepage = "https://github.com/fgcz/bfabricPy" From 5abc1786a55b2cd722fcc80f3eff326868b031db Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 10 Oct 2024 13:53:35 +0200 Subject: [PATCH 031/144] improve optional dependency definition --- pyproject.toml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bb3daaa9..fb9cb48c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,28 +25,22 @@ dependencies = [ "polars-lts-cpu >= 0.20.25; platform_machine == 'x86_64' and platform_system == 'Darwin'", "polars >= 0.20.25; platform_machine != 'x86_64' or platform_system != 'Darwin'", "loguru>=0.7", - "setuptools", - "pydantic", + "pydantic>=2.9.2", "eval_type_backport; python_version < '3.10'", "python-dateutil >= 2.9.0", - "cyclopts", + "cyclopts >= 2.9.9", #"platformdirs >= 4.3", ] [project.optional-dependencies] dev = [ + "bfabric[doc,test]", "black", "isort", "ruff", "licensecheck", - "pytest", - "pytest-mock", - "logot", "nox", "uv", - "mkdocs", - "mkdocs-material", - "mkdocstrings[python]", ] doc = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"] test = ["pytest", "pytest-mock", "logot"] From 4de47a53d8aa022f8c9d4ac3d79b0b735151ee88 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 11 Oct 2024 09:19:03 +0200 Subject: [PATCH 032/144] rename argument to "work_dir" for consistency --- src/bfabric/experimental/app_interface/cli/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bfabric/experimental/app_interface/cli/app.py b/src/bfabric/experimental/app_interface/cli/app.py index c3ca6fd9..a2d662fe 100644 --- a/src/bfabric/experimental/app_interface/cli/app.py +++ b/src/bfabric/experimental/app_interface/cli/app.py @@ -16,7 +16,7 @@ @app_app.command() def run( app_spec: Path, - target_folder: Path, + work_dir: Path, workunit_ref: int | Path, *, ssh_user: str | None = None, @@ -30,7 +30,7 @@ def run( run_app( app_spec=app_spec_parsed, workunit_ref=workunit_ref, - work_dir=target_folder, + work_dir=work_dir, client=client, ssh_user=ssh_user, read_only=read_only, From b8dcd2604368e4f0355157af98732095587de76b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 15 Oct 2024 13:01:01 +0200 Subject: [PATCH 033/144] `bfabric_legacy.py` has been removed --- docs/changelog.md | 4 + src/bfabric/bfabric_legacy.py | 243 ---------------------------------- 2 files changed, 4 insertions(+), 243 deletions(-) delete mode 100644 src/bfabric/bfabric_legacy.py diff --git a/docs/changelog.md b/docs/changelog.md index 6bd26185..0e80e294 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -25,6 +25,10 @@ Versioning currently follows `X.Y.Z` where - Except for macOS x86_64 (which we assume is Rosetta emulation nowadays), we use the faster `polars` instead of `polars-lts-cpu`. +### Removed + +- `bfabric_legacy.py` has been removed. + ## \[1.13.8\] - 2024-10-03 This release contains mainly internal changes and ongoing development on the experimental app interface functionality. diff --git a/src/bfabric/bfabric_legacy.py b/src/bfabric/bfabric_legacy.py deleted file mode 100644 index 673d22cd..00000000 --- a/src/bfabric/bfabric_legacy.py +++ /dev/null @@ -1,243 +0,0 @@ -from __future__ import annotations -import base64 -import json -import os -import sys -from pprint import pprint -from typing import Any - -import yaml -from suds.client import Client -from suds.wsdl import Service - -from bfabric.config import BfabricClientConfig -from bfabric.config import BfabricAuth -from bfabric.bfabric_config import read_config - - -class BfabricLegacy: - """B-Fabric python3 module - Implements read and save object methods for B-Fabric wsdl interface - """ - - def warning(self, msg) -> None: - sys.stderr.write(f"\033[93m{msg}\033[0m\n") - - def __init__( - self, - login: str = None, - password: str = None, - base_url: str = None, - externaljobid=None, - config_path: str = None, - config_env: str = None, - optional_auth: bool = False, - verbose: bool = False, - ) -> None: - """ - :param login: Login string for overriding config file - :param password: Password for overriding config file - :param base_url: Base url of the BFabric server for overriding config file - :param externaljobid: ? - :param config_path: Path to the config file, in case it is different from default - :param config_env: Which config environment to use. Can also specify via environment variable or use - default in the config file (at your own risk) - :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, - otherwise an exception will be raised - :param verbose: Verbosity (TODO: resolve potential redundancy with logger) - """ - self.verbose = verbose - - self.cl = {} - self.verbose = False - self.query_counter = 0 - - # Get default path config file path - config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) - - # TODO: Convert to an exception when this branch becomes main - config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) - if os.path.isfile(config_path): - self.warning( - "WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml" - ) - - # Use the provided config data from arguments instead of the file - if not os.path.isfile(config_path): - self.warning("could not find '.bfabricpy.yml' file in home directory.") - self.config = BfabricClientConfig(base_url=base_url) - self.auth = BfabricAuth(login=login, password=password) - - # Load config from file, override some of the fields with the provided ones - else: - config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) - self.config = config.with_overrides(base_url=base_url) - if (login is not None) and (password is not None): - self.auth = BfabricAuth(login=login, password=password) - elif (login is None) and (password is None): - self.auth = auth - else: - raise OSError("Must provide both username and password, or neither.") - - if not self.config.base_url: - raise ValueError("base server url missing") - if not optional_auth: - if not self.auth or not self.auth.login or not self.auth.password: - raise ValueError("Authentification not initialized but required") - - msg = f"\033[93m--- base_url {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" - sys.stderr.write(msg) - - if self.verbose: - pprint(self.config) - - def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): - """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains all the attributes of the endpoint - for the "query". - """ - return self._perform_request( - endpoint=endpoint, method="read", plain=plain, params=dict(query=obj, idonly=idonly, page=page) - ) - - def readid_object(self, endpoint, obj, page=1, plain=False): - """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains only the id of the endpoint for the "query". - """ - return self._perform_request(endpoint=endpoint, method="readid", plain=plain, params=dict(query=obj, page=page)) - - def save_object(self, endpoint, obj, debug=None): - """ - same as read_object above but uses the save method. - """ - return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) - - def checkandinsert_object(self, endpoint, obj, debug=None): - """ - wsdl method to check iff dependencies are fulfilled - """ - # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical - # to the save_object method. Check if this was intended. - return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) - - def delete_object(self, endpoint, id=None, debug=None): - """ - same as read_object above but uses the delete method. - """ - return self._perform_request(endpoint=endpoint, method="delete", plain=debug is not None, params=dict(id=id)) - - def upload_file(self, filename, workunitid): - with open(filename, "rb") as f: - content = f.read() - - resource_base64 = base64.b64encode(content).decode() - - res = self.save_object( - "resource", - { - "base64": resource_base64, - "name": os.path.basename(filename), - "description": "base64 encoded file", - "workunitid": workunitid, - }, - ) - - return res - - def _get_service(self, endpoint: str) -> Service: - """Returns a `suds.client.Service` object for the given endpoint name.""" - if endpoint not in self.cl: - self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) - return self.cl[endpoint].service - - def _perform_request(self, endpoint: str, method: str, plain: bool, params: dict[str, Any]) -> Any: - """Performs a request to the given endpoint and returns the result.""" - self.query_counter += 1 - request_params = dict(login=self.auth.login, password=self.auth.password, **params) - service = self._get_service(endpoint=endpoint) - response = getattr(service, method)(request_params) - if plain: - return response - elif getattr(response, "entitiesonpage", None) == 0: - return [] - return getattr(response, endpoint) - - @staticmethod - def print_json(queryres=None) -> None: - """ - This method prints the query result as returned by ``read_object`` in JSON format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. - """ - if queryres is None: - raise TypeError( - "print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json" - ) - - res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) - print(res) - - @staticmethod - def print_yaml(queryres=None) -> None: - """ - This method prints the query result as returned by ``read_object`` in YAML format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. - """ - if queryres is None: - raise TypeError( - "print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml" - ) - - res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) - res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) - print(res) - - def get_sampleid(self, resourceid=None): - """ - determines the sample_id of a given resource_id. - it performs a recursive dfs. - TODO(cp): check if the method should be implemented using a stack - - :param resourceid: - :return: (int, int) - """ - - assert isinstance(resourceid, int) - - try: - resource = self.read_object("resource", obj={"id": resourceid})[0] - except: - return None - - try: - workunit = self.read_object(endpoint="workunit", obj={"id": resource.workunit._id})[0] - return self.get_sampleid(resourceid=int(workunit.inputresource[0]._id)) - except: - self.warning(f"fetching sampleid of resource.workunitid = {resource.workunit._id} failed.") - return None - - -class bfabricEncoder(json.JSONEncoder): - """ - Implements json encoder for the Bfabric.print_json method - """ - - def default(self, o): - try: - return dict(o) - except TypeError: - pass - else: - return list(o) - return JSONEncoder.default(self, o) From 57d9d2df33d4fbd735d576859983162d187c0d06 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 15 Oct 2024 13:02:42 +0200 Subject: [PATCH 034/144] `math_helper.py` has been removed --- docs/changelog.md | 1 + src/bfabric/utils/math_helper.py | 9 --------- tests/bfabric/utils/test_math_helper.py | 15 --------------- 3 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 src/bfabric/utils/math_helper.py delete mode 100644 tests/bfabric/utils/test_math_helper.py diff --git a/docs/changelog.md b/docs/changelog.md index 0e80e294..b659ab7a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -28,6 +28,7 @@ Versioning currently follows `X.Y.Z` where ### Removed - `bfabric_legacy.py` has been removed. +- `math_helper.py` has been removed. ## \[1.13.8\] - 2024-10-03 diff --git a/src/bfabric/utils/math_helper.py b/src/bfabric/utils/math_helper.py deleted file mode 100644 index 7e20278f..00000000 --- a/src/bfabric/utils/math_helper.py +++ /dev/null @@ -1,9 +0,0 @@ -def div_int_ceil(n: int, d: int) -> int: - """ - :param n: Numerator - :param d: Denominator - :return: Performs integer ceiling division - Theoretically equivalent to math.ceil(n/d), but not subject to floating-point errors. - """ - q, r = divmod(n, d) - return q + bool(r) diff --git a/tests/bfabric/utils/test_math_helper.py b/tests/bfabric/utils/test_math_helper.py deleted file mode 100644 index 0f81be22..00000000 --- a/tests/bfabric/utils/test_math_helper.py +++ /dev/null @@ -1,15 +0,0 @@ -import unittest - -import bfabric.utils.math_helper as math_helper - - -class BfabricTestMath(unittest.TestCase): - def test_integer_division(self): - # Main purpose of dictionary sorting is that they appear consistent when printed - self.assertEqual(math_helper.div_int_ceil(120, 100), 2) - self.assertEqual(math_helper.div_int_ceil(200, 100), 2) - self.assertEqual(math_helper.div_int_ceil(245, 100), 3) - - -if __name__ == "__main__": - unittest.main(verbosity=2) From 99a2a850d169a024612385e07d98ffa959ce4099 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 15 Oct 2024 14:27:38 +0200 Subject: [PATCH 035/144] modernize --- .../bfabric_list_workunit_parameters.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/bfabric_scripts/bfabric_list_workunit_parameters.py b/src/bfabric_scripts/bfabric_list_workunit_parameters.py index 88439847..b7f17e54 100644 --- a/src/bfabric_scripts/bfabric_list_workunit_parameters.py +++ b/src/bfabric_scripts/bfabric_list_workunit_parameters.py @@ -1,11 +1,12 @@ import argparse import json -import sys import polars as pl import rich from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging +from bfabric.experimental import MultiQuery def bfabric_list_workunit_parameters(client: Bfabric, application_id: int, max_workunits: int, format: str) -> None: @@ -74,16 +75,13 @@ def print_results(format: str, merged_result: pl.DataFrame) -> None: def get_parameter_table(client: Bfabric, workunits_table_explode: pl.DataFrame) -> pl.DataFrame: """Returns a wide format table for the specified parameters, with the key `workunit_id` indicating the source.""" # load the parameters table - collect = [] - for i_frame, frame in enumerate(workunits_table_explode.iter_slices(100)): - print( - f"-- Reading parameters chunk {i_frame + 1} of {len(workunits_table_explode) // 100 + 1}", file=sys.stderr - ) - chunk = ( - client.read("parameter", {"id": frame["parameter_id"].to_list()}).to_polars().rename({"id": "parameter_id"}) - ) - collect.append(chunk) - parameter_table_full = pl.concat(collect, how="align")[["parameter_id", "key", "value"]] + collect = MultiQuery(client=client).read_multi( + endpoint="parameter", + obj={}, + multi_query_key="id", + multi_query_vals=workunits_table_explode["parameter_id"].to_list(), + ) + parameter_table_full = collect.to_polars().rename({"id": "parameter_id"})[["parameter_id", "key", "value"]] # add workunit id to parameter table parameter_table_full = parameter_table_full.join( workunits_table_explode[["workunit_id", "parameter_id"]], on="parameter_id", how="left" @@ -94,6 +92,7 @@ def get_parameter_table(client: Bfabric, workunits_table_explode: pl.DataFrame) def main() -> None: """Parses command line arguments and calls `bfabric_list_workunit_parameters`.""" + setup_script_logging() client = Bfabric.from_config() parser = argparse.ArgumentParser() parser.add_argument("application_id", type=int, help="The application ID to list the workunit parameters for.") From 3aa865ccd1ae1c8fad7c8c225101a4463d666901 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 16 Oct 2024 16:04:23 +0200 Subject: [PATCH 036/144] remove old text from readme --- README.md | 261 +---------------------------------------- docs/old_cheatsheet.md | 255 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+), 258 deletions(-) create mode 100644 docs/old_cheatsheet.md diff --git a/README.md b/README.md index 2d303871..7c3b7b4f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -![unitTests](https://github.com/fgcz/bfabricPy/workflows/unit%20tests/badge.svg) +[![PR Checks](https://github.com/fgcz/bfabricPy/actions/workflows/run_unit_tests.yml/badge.svg)](https://github.com/fgcz/bfabricPy/actions/workflows/run_unit_tests.yml) +[![Nightly Integration Tests](https://github.com/fgcz/bfabricPy-tests/actions/workflows/nightly_tests.yml/badge.svg)](https://github.com/fgcz/bfabricPy-tests/actions/workflows/nightly_tests.yml) [![EDBT'10](https://img.shields.io/badge/EDBT-10.1145%2F1739041.1739135-brightgreen)](https://doi.org/10.1145/1739041.1739135) [![JIB](https://img.shields.io/badge/JIB-10.1515%2Fjib.2022.0031-brightgreen)](https://doi.org/10.1515/jib-2022-0031) @@ -10,262 +11,6 @@ For more advanced users the *bfabricPy* package also provides a powerful query i You can find the up-to-date documentation at [https://fgcz.github.io/bfabricPy](https://fgcz.github.io/bfabricPy). -## CheatSheet - -### Read - -```{bash} -bfabric_read.py storage -bfabric_read.py application -``` - -Simple database query examples - -```{bash} -bfabric_read.py user login cpanse -bfabric_read.py project id 3000 -bfabric_read.py workunit id 199387 -bfabric_read.py sample name autoQC4L -bfabric_read.py workunit status processing -bfabric_read.py workunit status pending -bfabric_read.py workunit status failed - -# list empty resources -bfabric_read.py resource filechecksum d41d8cd98f00b204e9800998ecf8427e -``` - -Using the Python API: - -```{py} -from bfabric import Bfabric - -client = Bfabric.from_config() - -user = client.read(endpoint = 'user', obj={'login': 'cpanse'}) -resource = client.read(endpoint = 'resource', obj={'id': 550327 }) -``` - -### save - -```{bash} -bfabric_save_workunit_attribute.py 199387 status available -``` - -```{python} -import json -rv = client.save('workunit', {'id': 254063, 'status': 'available'}) -print(json.dumps(rv.to_list_dict(), indent=2)) -``` - -### Command line code snippet - -Find empty resource files in bfabric - -```{bash} -bfabric_read.py resource filechecksum `md5sum < /dev/null | cut -c-32` \ - | cat -n \ - | tail -``` - -## Examples \[outdated\] - -### bash script generated by the yaml wrapper creator / submitter - -externaljobid-45939_executableid-15312.bash listing: - -```bash -#!/bin/bash -# -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/README.md $ -# $Id: README.md 2535 2016-10-24 08:49:17Z cpanse $ -# Christian Panse 2007-2015 - -# Grid Engine Parameters -#$ -q PRX@fgcz-c-071 -#$ -e /home/bfabric/sgeworker/logs/workunitid-134923_resourceid-203236.err -#$ -o /home/bfabric/sgeworker/logs/workunitid-134923_resourceid-203236.out - - -set -e -set -o pipefail - -export EXTERNALJOBID=45938 -export RESSOURCEID_OUTPUT=203238 -export RESSOURCEID_STDOUT_STDERR="203237 203238" -export OUTPUT="bfabric@fgczdata.fgcz-net.unizh.ch:/srv/www/htdocs//p1000/bfabric/Proteomics/gerneric_yaml/2015/2015-09/2015-09-02//workunit_134923//203236.zip" - -# job configuration set by B-Fabrics wrapper_creator executable -_OUTPUT=`echo $OUTPUT | cut -d"," -f1` -test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` -test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` -test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` -test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" - -if [ $? -eq 1 ]; -then - echo "writting to output url failed!"; - exit 1; -fi - -cat > /tmp/yaml_config.$$ < - - - - - XXX - XXX - - 482 - - - - - ' -} - -for url in https://fgcz-bfabric.uzh.ch/bfabric/user?wsdl https://fgcz-bfabric-test.uzh.ch/bfabric/user?wsdl; -do - echo - echo "==== ${url} === " - query ${url} -done - -echo $? -``` - -### Example usage - -remove accidentally inserted mgf files - -``` -bfabric_read.py importresource \ - | grep mgf$ \ - | awk '{print $1}' \ - | tee /tmp/$$.log \ - | while read i; - do - bfabric_delete.py importresource $i ; - done -``` - -## Send an E-mail \[outdated\] - -``` -# by CT,CP -# not implemented yet 2022-10-19 , -rv = B.save_object(endpoint = 'mail', - obj={'subject': "TEST", - 'recipientemail': 'bfabrictest482.cp@fgcz.ethz.ch', - 'message': "TEST; ignore that email", - 'parentId': 482, - 'parentClassName': 'user'}) -# shown as mail for user id 482 -``` - -## See also - -- [bfabric documentation](https://fgcz-bfabric.uzh.ch/wiki/HomePage) -- [FAQ](faq.md) -- [wsdl4BFabric](http://fgcz-intranet.uzh.ch/tiki-index.php?page=wsdl4BFabric) wiki page -- WSDL Interface to B-Fabric [endpoints](http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl) - -## FAQ - -### How to resolve ` 2007-2015 + +# Grid Engine Parameters +#$ -q PRX@fgcz-c-071 +#$ -e /home/bfabric/sgeworker/logs/workunitid-134923_resourceid-203236.err +#$ -o /home/bfabric/sgeworker/logs/workunitid-134923_resourceid-203236.out + + +set -e +set -o pipefail + +export EXTERNALJOBID=45938 +export RESSOURCEID_OUTPUT=203238 +export RESSOURCEID_STDOUT_STDERR="203237 203238" +export OUTPUT="bfabric@fgczdata.fgcz-net.unizh.ch:/srv/www/htdocs//p1000/bfabric/Proteomics/gerneric_yaml/2015/2015-09/2015-09-02//workunit_134923//203236.zip" + +# job configuration set by B-Fabrics wrapper_creator executable +_OUTPUT=`echo $OUTPUT | cut -d"," -f1` +test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` +test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` +test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` +test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" + +if [ $? -eq 1 ]; +then + echo "writting to output url failed!"; + exit 1; +fi + +cat > /tmp/yaml_config.$$ < + + + + + XXX + XXX + + 482 + + + + + ' +} + +for url in https://fgcz-bfabric.uzh.ch/bfabric/user?wsdl https://fgcz-bfabric-test.uzh.ch/bfabric/user?wsdl; +do + echo + echo "==== ${url} === " + query ${url} +done + +echo $? +``` + +### Example usage + +remove accidentally inserted mgf files + +``` +bfabric_read.py importresource \ + | grep mgf$ \ + | awk '{print $1}' \ + | tee /tmp/$$.log \ + | while read i; + do + bfabric_delete.py importresource $i ; + done +``` + +## Send an E-mail \[outdated\] + +``` +# by CT,CP +# not implemented yet 2022-10-19 , +rv = B.save_object(endpoint = 'mail', + obj={'subject': "TEST", + 'recipientemail': 'bfabrictest482.cp@fgcz.ethz.ch', + 'message': "TEST; ignore that email", + 'parentId': 482, + 'parentClassName': 'user'}) +# shown as mail for user id 482 +``` + +## See also + +- [bfabric documentation](https://fgcz-bfabric.uzh.ch/wiki/HomePage) +- [FAQ](faq.md) +- [wsdl4BFabric](http://fgcz-intranet.uzh.ch/tiki-index.php?page=wsdl4BFabric) wiki page +- WSDL Interface to B-Fabric [endpoints](http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl) + +## FAQ + +### How to resolve ` Date: Wed, 16 Oct 2024 16:11:22 +0200 Subject: [PATCH 037/144] improve readme --- README.md | 21 +++++++++++++++------ docs/index.md | 11 ++++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7c3b7b4f..f3670939 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,25 @@ +# bfabricPy + [![PR Checks](https://github.com/fgcz/bfabricPy/actions/workflows/run_unit_tests.yml/badge.svg)](https://github.com/fgcz/bfabricPy/actions/workflows/run_unit_tests.yml) [![Nightly Integration Tests](https://github.com/fgcz/bfabricPy-tests/actions/workflows/nightly_tests.yml/badge.svg)](https://github.com/fgcz/bfabricPy-tests/actions/workflows/nightly_tests.yml) [![EDBT'10](https://img.shields.io/badge/EDBT-10.1145%2F1739041.1739135-brightgreen)](https://doi.org/10.1145/1739041.1739135) [![JIB](https://img.shields.io/badge/JIB-10.1515%2Fjib.2022.0031-brightgreen)](https://doi.org/10.1515/jib-2022-0031) -# bfabricPy - -This package connects the [bfabric](https://fgcz-bfabric.uzh.ch/bfabric/) system to the [python](https://www.python.org/) and [R](https://cran.r-project.org/) world while providing a JSON and REST interface using [Flask](https://www.fullstackpython.com). -The [bfabricShiny](https://github.com/cpanse/bfabricShiny) R package is an extension and provides code snippets and sample implementation for a seamless R shiny bfabric integration. -For more advanced users the *bfabricPy* package also provides a powerful query interface on the command-line though using the provided scripts. +## Documentation You can find the up-to-date documentation at [https://fgcz.github.io/bfabricPy](https://fgcz.github.io/bfabricPy). -# Howto cite? +## Introduction + +This package implements a Python interface to the [B-Fabric](https://fgcz-bfabric.uzh.ch/bfabric/) system. +Several pieces of functionality are available: + +- Python API: + - General client for all B-Fabric web service operations (CRUD) and configuration management. + - A relational API for low-boilerplate read access to the B-Fabric system. +- Scripts: Several scripts we use more or less frequently to interact with the system. +- A REST API: A REST API to interact with the B-Fabric system. This allows us to interact with B-Fabric from R using [bfabricShiny](https://github.com/cpanse/bfabricShiny). + +## Howto cite? Panse, Christian, Trachsel, Christian and Türker, Can. "Bridging data management platforms and visualization tools to enable ad-hoc and smart analytics in life sciences" Journal of Integrative Bioinformatics, 2022, pp. 20220031. [doi: 10.1515/jib-2022-0031](https://doi.org/10.1515/jib-2022-0031). diff --git a/docs/index.md b/docs/index.md index 92983fc7..ac58235e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,13 @@ # Home -This package connects the [bfabric](https://fgcz-bfabric.uzh.ch/bfabric/) system to the [python](https://www.python.org/) and [R](https://cran.r-project.org/) world while providing a JSON and REST interface using [Flask](https://www.fullstackpython.com). -The [bfabricShiny](https://github.com/cpanse/bfabricShiny) R package is an extension and provides code snippets and sample implementation for a seamless R shiny bfabric integration. -For more advanced users the *bfabricPy* package also provides a powerful query interface on the command-line though using the provided scripts. +This package implements a Python interface to the [B-Fabric](https://fgcz-bfabric.uzh.ch/bfabric/) system. +Several pieces of functionality are available: + +- Python API: + - General client for all B-Fabric web service operations (CRUD) and configuration management. + - A relational API for low-boilerplate read access to the B-Fabric system. +- Scripts: Several scripts we use more or less frequently to interact with the system. +- A REST API: A REST API to interact with the B-Fabric system. This allows us to interact with B-Fabric from R using [bfabricShiny](https://github.com/cpanse/bfabricShiny). Please see below for how to install bfabricPy. From 16b7a2fbda6af65d945bfe737cadadd724411702 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 08:58:33 +0200 Subject: [PATCH 038/144] Introduce inputs check and refactor code --- src/bfabric/entities/dataset.py | 9 ++ .../experimental/app_interface/cli/chunk.py | 22 ----- .../experimental/app_interface/cli/inputs.py | 60 +++++++++++-- .../input_preparation/integrity.py | 50 +++++++++++ .../input_preparation/list_inputs.py | 57 ++++++++++++ .../input_preparation/prepare.py | 87 +++++++------------ 6 files changed, 200 insertions(+), 85 deletions(-) create mode 100644 src/bfabric/experimental/app_interface/input_preparation/integrity.py create mode 100644 src/bfabric/experimental/app_interface/input_preparation/list_inputs.py diff --git a/src/bfabric/entities/dataset.py b/src/bfabric/entities/dataset.py index 88a9b46b..4ca35cec 100644 --- a/src/bfabric/entities/dataset.py +++ b/src/bfabric/entities/dataset.py @@ -1,5 +1,6 @@ from __future__ import annotations +import tempfile from pathlib import Path from typing import Any @@ -31,3 +32,11 @@ def to_polars(self) -> DataFrame: def write_csv(self, path: Path, separator: str = ",") -> None: """Writes the dataset to a csv file at `path`, using the specified column `separator`.""" self.to_polars().write_csv(path, separator=separator) + + def get_csv(self, separator: str = ",") -> str: + """Returns the dataset as a csv string, using the specified column `separator`.""" + with tempfile.NamedTemporaryFile() as tmp_file: + self.write_csv(Path(tmp_file.name), separator=separator) + tmp_file.flush() + tmp_file.seek(0) + return tmp_file.read().decode() diff --git a/src/bfabric/experimental/app_interface/cli/chunk.py b/src/bfabric/experimental/app_interface/cli/chunk.py index f82bfbf3..c770ce98 100644 --- a/src/bfabric/experimental/app_interface/cli/chunk.py +++ b/src/bfabric/experimental/app_interface/cli/chunk.py @@ -45,28 +45,6 @@ def run_all( ) -@app_chunk.command() -def inputs( - app_spec: Path, - chunk_dir: Path, - *, - ssh_user: str | None = None, -) -> None: - """Prepare the input files for a chunk. - - :param app_spec: Path to the app spec file. - :param chunk_dir: Path to the chunk directory. - :param ssh_user: SSH user to use for downloading the input files, instead of the current user. - """ - setup_script_logging() - client = Bfabric.from_config() - chunk_dir = chunk_dir.resolve() - app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) - - runner = Runner(spec=app_spec_parsed, client=client, ssh_user=ssh_user) - runner.run_prepare_input(chunk_dir=chunk_dir) - - @app_chunk.command() def process(app_spec: Path, chunk_dir: Path) -> None: """Process a chunk. diff --git a/src/bfabric/experimental/app_interface/cli/inputs.py b/src/bfabric/experimental/app_interface/cli/inputs.py index 7ffafeac..51177cbe 100644 --- a/src/bfabric/experimental/app_interface/cli/inputs.py +++ b/src/bfabric/experimental/app_interface/cli/inputs.py @@ -7,7 +7,13 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from bfabric.experimental.app_interface.input_preparation import prepare_folder -from bfabric.experimental.app_interface.input_preparation.prepare import print_input_files_list +from bfabric.experimental.app_interface.input_preparation._spec import InputsSpec +from bfabric.experimental.app_interface.input_preparation.integrity import IntegrityState +from bfabric.experimental.app_interface.input_preparation.list_inputs import ( + list_input_states, + print_input_states, + FileState, +) app_inputs = cyclopts.App("inputs", help="Prepare input files for an app.") @@ -58,12 +64,56 @@ def clean( ) +def get_inputs_and_print( + inputs_yaml: Path, + target_folder: Path | None, + check: bool, +) -> list[FileState]: + client = Bfabric.from_config() + input_states = list_input_states( + specs=InputsSpec.read_yaml(inputs_yaml), + target_folder=target_folder or Path("."), + client=client, + check_files=check, + ) + print_input_states(input_states) + return input_states + + +@app_inputs.command(name="list") +def list_( + inputs_yaml: Path, + target_folder: Path | None = None, + check: bool = False, +) -> None: + """Lists the input files for an app. + + :param inputs_yaml: Path to the inputs.yml file. + :param target_folder: Path to the target folder where the input files should be located, if different from the + file containing the inputs.yml file. + """ + setup_script_logging() + get_inputs_and_print(inputs_yaml=inputs_yaml, target_folder=target_folder, check=check) + + @app_inputs.command() -def list( +def check( inputs_yaml: Path, target_folder: Path | None = None, ) -> None: - """Lists the input files for an app.""" + """Checks if the input files are present and have the correct content. + + The script will exit with a non-zero status + code if any of the input files are missing or have incorrect content. + :param inputs_yaml: Path to the inputs.yml file. + :param target_folder: Path to the target folder where the input files should be located, if different from the + file containing the inputs.yml file. + """ setup_script_logging() - client = Bfabric.from_config() - print_input_files_list(inputs_yaml=inputs_yaml, target_folder=target_folder, client=client) + input_states = get_inputs_and_print(inputs_yaml=inputs_yaml, target_folder=target_folder, check=True) + invalid_states = {state.integrity for state in input_states if state.integrity != IntegrityState.Correct} + if invalid_states: + print(f"Encountered invalid input states: {invalid_states}") + raise SystemExit(1) + else: + print("All input files are correct.") diff --git a/src/bfabric/experimental/app_interface/input_preparation/integrity.py b/src/bfabric/experimental/app_interface/input_preparation/integrity.py new file mode 100644 index 00000000..cdd9d8bb --- /dev/null +++ b/src/bfabric/experimental/app_interface/input_preparation/integrity.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from enum import Enum +from pathlib import Path + +from bfabric.bfabric import Bfabric +from bfabric.entities import Resource, Dataset +from bfabric.experimental.app_interface.input_preparation._spec import InputSpecType, ResourceSpec, DatasetSpec +from bfabric.experimental.app_interface.util.checksums import md5sum + + +class IntegrityState(Enum): + """ + TODO basically this: enum(Missing, Exists(NOT_CHECKED, CORRECT, INCORRECT)) + """ + + Missing = "Missing" + NotChecked = "NotChecked" + Correct = "Correct" + Incorrect = "Incorrect" + + def exists(self) -> bool: + return self != IntegrityState.Missing + + +def check_integrity(spec: InputSpecType, local_path: Path, client: Bfabric) -> IntegrityState: + """Checks the integrity of a local file against the spec.""" + if not local_path.exists(): + return IntegrityState.Missing + + if isinstance(spec, ResourceSpec): + return _check_resource_spec(spec, local_path, client) + elif isinstance(spec, DatasetSpec): + return _check_dataset_spec(spec, local_path, client) + else: + raise ValueError(f"Unsupported spec type: {type(spec)}") + + +def _check_resource_spec(spec: ResourceSpec, local_path: Path, client: Bfabric) -> IntegrityState: + expected_checksum = Resource.find(id=spec.id, client=client)["filechecksum"] + if expected_checksum == md5sum(local_path): + return IntegrityState.Correct + else: + return IntegrityState.Incorrect + + +def _check_dataset_spec(spec: DatasetSpec, local_path: Path, client: Bfabric) -> IntegrityState: + dataset = Dataset.find(id=spec.id, client=client) + is_identical = local_path.read_text().strip() == dataset.get_csv(separator=spec.separator).strip() + return IntegrityState.Correct if is_identical else IntegrityState.Incorrect diff --git a/src/bfabric/experimental/app_interface/input_preparation/list_inputs.py b/src/bfabric/experimental/app_interface/input_preparation/list_inputs.py new file mode 100644 index 00000000..a48be526 --- /dev/null +++ b/src/bfabric/experimental/app_interface/input_preparation/list_inputs.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from rich.console import Console +from rich.table import Table, Column + +from bfabric.bfabric import Bfabric +from bfabric.experimental.app_interface.input_preparation._spec import InputSpecType +from bfabric.experimental.app_interface.input_preparation.integrity import check_integrity, IntegrityState + + +@dataclass +class FileState: + name: str + path: Path + type: str + exists: bool + integrity: IntegrityState + + +def list_input_states( + specs: list[InputSpecType], + target_folder: Path, + client: Bfabric, + check_files: bool, +) -> list[FileState]: + input_states = [] + for spec in specs: + filename = spec.resolve_filename(client=client) + path = target_folder / filename + exists = path.exists() + if not check_files: + integrity = IntegrityState.NotChecked + else: + integrity = check_integrity(spec=spec, local_path=path, client=client) + input_states.append(FileState(name=filename, path=path, exists=exists, integrity=integrity, type=spec.type)) + return input_states + + +def print_input_states(input_states: list[FileState]) -> None: + table = Table( + Column("File"), + Column("Input Type"), + Column("Exists"), + Column("Integrity"), + ) + for state in input_states: + table.add_row( + str(state.name), + str(state.type), + {True: "Yes", False: "No"}[state.exists], + state.integrity.value, + ) + console = Console() + console.print(table) diff --git a/src/bfabric/experimental/app_interface/input_preparation/prepare.py b/src/bfabric/experimental/app_interface/input_preparation/prepare.py index 17cff05d..2c35a05a 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/prepare.py +++ b/src/bfabric/experimental/app_interface/input_preparation/prepare.py @@ -1,11 +1,8 @@ from __future__ import annotations -import tempfile from pathlib import Path from loguru import logger -from rich.console import Console -from rich.table import Table, Column from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset @@ -15,6 +12,8 @@ InputSpecType, InputsSpec, ) +from bfabric.experimental.app_interface.input_preparation.integrity import IntegrityState +from bfabric.experimental.app_interface.input_preparation.list_inputs import list_input_states from bfabric.experimental.app_interface.util.checksums import md5sum from bfabric.experimental.app_interface.util.scp import scp @@ -26,24 +25,30 @@ def __init__(self, client: Bfabric, working_dir: Path, ssh_user: str | None) -> self._ssh_user = ssh_user def prepare_all(self, specs: list[InputSpecType]) -> None: - for spec in specs: - logger.debug(f"Preparing {spec}") - if isinstance(spec, ResourceSpec): + # TODO ensure dataset is cached + input_states = list_input_states( + specs=specs, target_folder=self._working_dir, client=self._client, check_files=True + ) + for spec, input_state in zip(specs, input_states): + if input_state.integrity == IntegrityState.Correct: + logger.debug(f"Skipping {spec} as it already exists and passed integrity check") + elif isinstance(spec, ResourceSpec): self.prepare_resource(spec) elif isinstance(spec, DatasetSpec): self.prepare_dataset(spec) else: - raise ValueError(f"Unknown spec type: {type(spec)}") + raise ValueError(f"Unsupported spec type: {type(spec)}") def clean_all(self, specs: list[InputSpecType]) -> None: - for spec in specs: - logger.debug(f"Cleaning {spec}") - if isinstance(spec, ResourceSpec): - self.clean_resource(spec) - elif isinstance(spec, DatasetSpec): - self.clean_dataset(spec) + input_states = list_input_states( + specs=specs, target_folder=self._working_dir, client=self._client, check_files=False + ) + for spec, input_state in zip(specs, input_states): + if not input_state.exists: + logger.debug(f"Skipping {spec} as it does not exist") else: - raise ValueError(f"Unknown spec type: {type(spec)}") + logger.info(f"rm {input_state.path}") + input_state.path.unlink() def prepare_resource(self, spec: ResourceSpec) -> None: resource = Resource.find(id=spec.id, client=self._client) @@ -55,32 +60,22 @@ def prepare_resource(self, spec: ResourceSpec) -> None: result_name = spec.filename if spec.filename else resource["name"] result_path = self._working_dir / result_name - # copy if necessary - if result_path.exists() and md5sum(result_path) == resource["filechecksum"]: - logger.debug(f"Skipping {resource['name']} as it already exists and has the correct checksum") - else: - scp(scp_uri, str(result_path), user=self._ssh_user) + # perform the copy + scp(scp_uri, str(result_path), user=self._ssh_user) - # verify checksum - if spec.check_checksum: - actual_checksum = md5sum(result_path) - logger.debug(f"Checksum: expected {resource['filechecksum']}, got {actual_checksum}") - if actual_checksum != resource["filechecksum"]: - raise ValueError(f"Checksum mismatch: expected {resource['filechecksum']}, got {actual_checksum}") + # verify checksum + if spec.check_checksum: + actual_checksum = md5sum(result_path) + logger.debug(f"Checksum: expected {resource['filechecksum']}, got {actual_checksum}") + if actual_checksum != resource["filechecksum"]: + raise ValueError(f"Checksum mismatch: expected {resource['filechecksum']}, got {actual_checksum}") def prepare_dataset(self, spec: DatasetSpec) -> None: dataset = Dataset.find(id=spec.id, client=self._client) + # TODO use the new functionality Dataset.get_csv (or even go further in the refactoring) target_path = self._working_dir / spec.filename target_path.parent.mkdir(exist_ok=True, parents=True) - with tempfile.NamedTemporaryFile() as tmp_file: - dataset.write_csv(Path(tmp_file.name), separator=spec.separator) - tmp_file.flush() - tmp_file.seek(0) - if target_path.exists() and target_path.read_text() == tmp_file.read().decode(): - logger.debug(f"Skipping {spec.filename} as it already exists and has the correct content") - else: - tmp_file.seek(0) - target_path.write_text(tmp_file.read().decode()) + dataset.write_csv(path=target_path, separator=spec.separator) def clean_resource(self, spec: ResourceSpec) -> None: filename = spec.resolve_filename(client=self._client) @@ -119,27 +114,3 @@ def prepare_folder( prepare.clean_all(specs=specs_list) else: raise ValueError(f"Unknown action: {action}") - - -def print_input_files_list( - inputs_yaml: Path, - target_folder: Path, - client: Bfabric, -) -> None: - """Prints a list of inputs and whether they exist locally.""" - specs_list = InputsSpec.read_yaml(inputs_yaml) - table = Table( - Column("File"), - Column("Input Type"), - Column("Exists Locally"), - ) - for spec in specs_list: - filename = spec.resolve_filename(client=client) - path = target_folder / filename if target_folder else Path(filename) - table.add_row( - str(path), - "Resource" if isinstance(spec, ResourceSpec) else "Dataset", - "Yes" if path.exists() else "No", - ) - console = Console() - console.print(table) From 0439feacb2448fb82d5d95fd9db56a98f78e5090 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 09:01:27 +0200 Subject: [PATCH 039/144] changelog --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index b659ab7a..95eb32cd 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -15,6 +15,7 @@ Versioning currently follows `X.Y.Z` where - Relationship: `ExternalJob.executable` - (experimental) Provide a script `bfabric-app-runner` to allow testing with pipx. - (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. +- (experimental) app_interface cli "inputs check" to validate the local files. ### Fixed From 5488b2b31e3c606289f513dda0963f01547b5cf7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 09:06:21 +0200 Subject: [PATCH 040/144] use the entity cache --- .../experimental/app_interface/cli/app.py | 18 ++++++++++-------- .../experimental/app_interface/cli/chunk.py | 6 ++++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/bfabric/experimental/app_interface/cli/app.py b/src/bfabric/experimental/app_interface/cli/app.py index a2d662fe..b0232bac 100644 --- a/src/bfabric/experimental/app_interface/cli/app.py +++ b/src/bfabric/experimental/app_interface/cli/app.py @@ -9,6 +9,7 @@ from bfabric.cli_formatting import setup_script_logging from bfabric.experimental.app_interface.app_runner._spec import AppSpec from bfabric.experimental.app_interface.app_runner.runner import run_app, Runner +from bfabric.experimental.entity_lookup_cache import EntityLookupCache app_app = cyclopts.App("app", help="Run an app.") @@ -27,14 +28,15 @@ def run( setup_script_logging() client = Bfabric.from_config() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) - run_app( - app_spec=app_spec_parsed, - workunit_ref=workunit_ref, - work_dir=work_dir, - client=client, - ssh_user=ssh_user, - read_only=read_only, - ) + with EntityLookupCache.enable(): + run_app( + app_spec=app_spec_parsed, + workunit_ref=workunit_ref, + work_dir=work_dir, + client=client, + ssh_user=ssh_user, + read_only=read_only, + ) @app_app.command() diff --git a/src/bfabric/experimental/app_interface/cli/chunk.py b/src/bfabric/experimental/app_interface/cli/chunk.py index c770ce98..d5394c99 100644 --- a/src/bfabric/experimental/app_interface/cli/chunk.py +++ b/src/bfabric/experimental/app_interface/cli/chunk.py @@ -9,6 +9,7 @@ from bfabric.cli_formatting import setup_script_logging from bfabric.experimental.app_interface.app_runner._spec import AppSpec from bfabric.experimental.app_interface.app_runner.runner import run_app, Runner +from bfabric.experimental.entity_lookup_cache import EntityLookupCache app_chunk = cyclopts.App("chunk", help="Run an app on a chunk. You can create the chunks with `app dispatch`.") @@ -58,8 +59,9 @@ def process(app_spec: Path, chunk_dir: Path) -> None: chunk_dir = chunk_dir.resolve() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) - runner = Runner(spec=app_spec_parsed, client=client, ssh_user=None) - runner.run_process(chunk_dir=chunk_dir) + with EntityLookupCache.enable(): + runner = Runner(spec=app_spec_parsed, client=client, ssh_user=None) + runner.run_process(chunk_dir=chunk_dir) @app_chunk.command() From bdcf201c375f8a221c1b7545f958cd77b516a619 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 09:09:30 +0200 Subject: [PATCH 041/144] move docs --- docs/{old_cheatsheet.md => old/cheatsheet.md} | 0 faq.md => docs/old/faq.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/{old_cheatsheet.md => old/cheatsheet.md} (100%) rename faq.md => docs/old/faq.md (100%) diff --git a/docs/old_cheatsheet.md b/docs/old/cheatsheet.md similarity index 100% rename from docs/old_cheatsheet.md rename to docs/old/cheatsheet.md diff --git a/faq.md b/docs/old/faq.md similarity index 100% rename from faq.md rename to docs/old/faq.md From 7c9c59160811958a45fc854136659706b128e0ab Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 11:51:01 +0200 Subject: [PATCH 042/144] add build_app_runner workflow --- .github/workflows/build_app_runner.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/build_app_runner.yml diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml new file mode 100644 index 00000000..2cb15ce6 --- /dev/null +++ b/.github/workflows/build_app_runner.yml @@ -0,0 +1,17 @@ +name: Build App Runner +on: + workflow_dispatch: +jobs: + build_app_runner: + name: Build App Runner + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build + run: | + bash app_runner/deploy/build.sh build-output app_runner + ls -l build-output + - name: Upload + uses: actions/upload-artifact@v4 + with: + path: build-output From bff9583f2f059301d0d3a4ef13e7e29580479a00 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 12:03:06 +0200 Subject: [PATCH 043/144] move app runner code into separate package --- .gitignore | 2 +- app_runner/.gitignore | 2 ++ app_runner/deploy/build.sh | 22 ++++++++++++++++ app_runner/deploy/build_steps.sh | 11 ++++++++ app_runner/deploy/builder/Dockerfile | 25 +++++++++++++++++++ app_runner/pyproject.toml | 19 ++++++++++++++ .../src}/app_runner/__init__.py | 0 .../src/app_runner/app_runner}/__init__.py | 0 .../src/app_runner}/app_runner/__main__.py | 4 +-- .../src/app_runner}/app_runner/_spec.py | 2 +- .../src/app_runner}/app_runner/runner.py | 8 +++--- .../src/app_runner/cli}/__init__.py | 0 app_runner/src/app_runner/cli/__main__.py | 19 ++++++++++++++ .../src/app_runner}/cli/app.py | 4 +-- .../src/app_runner}/cli/chunk.py | 4 +-- .../src/app_runner}/cli/inputs.py | 10 ++++---- .../src/app_runner}/cli/outputs.py | 4 +-- .../src/app_runner}/cli/validate.py | 6 ++--- .../src/app_runner/dispatch}/__init__.py | 0 .../dispatch/dispatch_individual_resources.py | 2 +- .../app_runner}/input_preparation/__init__.py | 0 .../app_runner}/input_preparation/__main__.py | 2 +- .../app_runner}/input_preparation/_spec.py | 0 .../input_preparation/integrity.py | 4 +-- .../input_preparation/list_inputs.py | 4 +-- .../app_runner}/input_preparation/prepare.py | 10 ++++---- .../output_registration/__init__.py | 0 .../output_registration/__main__.py | 2 +- .../app_runner}/output_registration/_spec.py | 0 .../output_registration/register.py | 6 ++--- .../src/app_runner/util}/__init__.py | 0 .../src/app_runner}/util/checksums.py | 0 .../src/app_runner}/util/scp.py | 0 .../app_interface/cli/__main__.py | 19 -------------- .../definition.py => workunit_definition.py} | 0 .../bfabric_wrapper_creator.py | 2 +- 36 files changed, 136 insertions(+), 57 deletions(-) create mode 100644 app_runner/.gitignore create mode 100644 app_runner/deploy/build.sh create mode 100644 app_runner/deploy/build_steps.sh create mode 100644 app_runner/deploy/builder/Dockerfile create mode 100644 app_runner/pyproject.toml rename {src/bfabric/experimental/app_interface => app_runner/src}/app_runner/__init__.py (100%) rename {src/bfabric/experimental/app_interface/cli => app_runner/src/app_runner/app_runner}/__init__.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/app_runner/__main__.py (88%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/app_runner/_spec.py (97%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/app_runner/runner.py (92%) rename {src/bfabric/experimental/app_interface/dispatch => app_runner/src/app_runner/cli}/__init__.py (100%) create mode 100644 app_runner/src/app_runner/cli/__main__.py rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/cli/app.py (91%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/cli/chunk.py (95%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/cli/inputs.py (90%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/cli/outputs.py (84%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/cli/validate.py (78%) rename {src/bfabric/experimental/app_interface/util => app_runner/src/app_runner/dispatch}/__init__.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/dispatch/dispatch_individual_resources.py (98%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/__init__.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/__main__.py (90%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/_spec.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/integrity.py (90%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/list_inputs.py (88%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/input_preparation/prepare.py (91%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/output_registration/__init__.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/output_registration/__main__.py (89%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/output_registration/_spec.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/output_registration/register.py (95%) rename {src/bfabric/experimental/app_interface/workunit => app_runner/src/app_runner/util}/__init__.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/util/checksums.py (100%) rename {src/bfabric/experimental/app_interface => app_runner/src/app_runner}/util/scp.py (100%) delete mode 100644 src/bfabric/experimental/app_interface/cli/__main__.py rename src/bfabric/experimental/{app_interface/workunit/definition.py => workunit_definition.py} (100%) diff --git a/.gitignore b/.gitignore index 42414e62..ee8be9c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ .idea/ __pycache__ -bfabric.egg-info/ +*.egg-info/ bfabric/scripts/query_result.txt build/ dist/ diff --git a/app_runner/.gitignore b/app_runner/.gitignore new file mode 100644 index 00000000..35dcd286 --- /dev/null +++ b/app_runner/.gitignore @@ -0,0 +1,2 @@ +builds +app_runner.spec diff --git a/app_runner/deploy/build.sh b/app_runner/deploy/build.sh new file mode 100644 index 00000000..4df17b1c --- /dev/null +++ b/app_runner/deploy/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -euxo pipefail +# Parse arguments +TARGET_DIR=$(readlink -f "${1:-./dist}") +TARGET_NAME="${2:-app_runner}" +DOCKER=docker + +DEPLOY_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +APP_RUNNER_PROJECT_DIR=$(realpath "$DEPLOY_DIR/..") +BUILDER_IMAGE=local-build_app_runner:0.0.1 +$DOCKER build -t $BUILDER_IMAGE "$DEPLOY_DIR/builder" + +mkdir -p "$TARGET_DIR" +$DOCKER run \ + --user "$(id -u):$(id -g)" \ + --rm \ + --mount type=bind,source="$APP_RUNNER_PROJECT_DIR",target=/work/app_runner \ + --mount type=bind,source="$DEPLOY_DIR"/build_steps.sh,target=/work/build_steps.sh,readonly \ + --mount type=bind,source="$TARGET_DIR",target=/work/dist \ + --workdir /work/app_runner \ + "$BUILDER_IMAGE" \ + bash /work/build_steps.sh /work/dist "$TARGET_NAME" diff --git a/app_runner/deploy/build_steps.sh b/app_runner/deploy/build_steps.sh new file mode 100644 index 00000000..56b7dd04 --- /dev/null +++ b/app_runner/deploy/build_steps.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -euxo pipefail +TARGET_DIR="${1:-dist}" +TARGET_NAME="${2:-app_runner}" +rm -rf /work/venv +python -m venv /work/venv +source /work/venv/bin/activate +uv pip install . +uv pip install pyinstaller +pyinstaller -y --onedir --name "${TARGET_NAME}" --distpath "${TARGET_DIR}" src/app_runner/cli/__main__.py +deactivate diff --git a/app_runner/deploy/builder/Dockerfile b/app_runner/deploy/builder/Dockerfile new file mode 100644 index 00000000..aa1e1959 --- /dev/null +++ b/app_runner/deploy/builder/Dockerfile @@ -0,0 +1,25 @@ +ARG DEBIAN_VERSION=buster +FROM debian:${DEBIAN_VERSION} +ARG PYTHON_VERSION=3.13.0 + +LABEL org.opencontainers.image.authors="Leonardo Schwarz" + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y curl git bash build-essential ccache \ + && apt-get install -y libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev + +RUN curl https://pyenv.run | bash + +ENV PATH=$PATH:/root/.pyenv/bin +RUN pyenv install $PYTHON_VERSION +RUN pyenv global $PYTHON_VERSION +ENV PATH=/root/.pyenv/versions/${PYTHON_VERSION}/bin:$PATH + +RUN pip install --root-user-action ignore uv pyinstaller +RUN chmod -R 0777 /root +RUN mkdir /work && chmod 0777 /work +RUN mkdir /home/user && chmod 0777 /home/user + +ENV HOME=/home/user +WORKDIR /work diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml new file mode 100644 index 00000000..2021d7bf --- /dev/null +++ b/app_runner/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "app_runner" +description = "Application runner for B-Fabric apps" +version = "0.1.0" +license = { text = "GPL-3.0" } +authors = [ + {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, +] +requires-python = ">=3.12" +dependencies = [ + "bfabric @ git+https://github.com/fgcz/bfabricPy.git@main", +] + +[project.scripts] +"bfabric-app-runner"="app_runner.cli.__main__:app" diff --git a/src/bfabric/experimental/app_interface/app_runner/__init__.py b/app_runner/src/app_runner/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/app_runner/__init__.py rename to app_runner/src/app_runner/__init__.py diff --git a/src/bfabric/experimental/app_interface/cli/__init__.py b/app_runner/src/app_runner/app_runner/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/cli/__init__.py rename to app_runner/src/app_runner/app_runner/__init__.py diff --git a/src/bfabric/experimental/app_interface/app_runner/__main__.py b/app_runner/src/app_runner/app_runner/__main__.py similarity index 88% rename from src/bfabric/experimental/app_interface/app_runner/__main__.py rename to app_runner/src/app_runner/app_runner/__main__.py index c70d6b08..18dbc674 100644 --- a/src/bfabric/experimental/app_interface/app_runner/__main__.py +++ b/app_runner/src/app_runner/app_runner/__main__.py @@ -9,8 +9,8 @@ from bfabric.bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.app_runner._spec import AppSpec -from bfabric.experimental.app_interface.app_runner.runner import run_app +from app_runner.app_runner._spec import AppSpec +from app_runner.app_runner.runner import run_app def main() -> None: diff --git a/src/bfabric/experimental/app_interface/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py similarity index 97% rename from src/bfabric/experimental/app_interface/app_runner/_spec.py rename to app_runner/src/app_runner/app_runner/_spec.py index dfa2a213..0419257e 100644 --- a/src/bfabric/experimental/app_interface/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -46,7 +46,7 @@ class CommandDocker(BaseModel): mounts: MountOptions = MountOptions() def to_shell(self, work_dir: Path | None = None) -> list[str]: - work_dir = (work_dir or Path(".")).expanduser().absolute() + work_dir = (work_dir or Path("")).expanduser().absolute() mounts = self.mounts.collect(work_dir=work_dir) mount_args = [] for host, container, read_only in mounts: diff --git a/src/bfabric/experimental/app_interface/app_runner/runner.py b/app_runner/src/app_runner/app_runner/runner.py similarity index 92% rename from src/bfabric/experimental/app_interface/app_runner/runner.py rename to app_runner/src/app_runner/app_runner/runner.py index 59bb0bb2..03ee5fe3 100644 --- a/src/bfabric/experimental/app_interface/app_runner/runner.py +++ b/app_runner/src/app_runner/app_runner/runner.py @@ -9,10 +9,10 @@ from pydantic import BaseModel from bfabric import Bfabric -from bfabric.experimental.app_interface.app_runner._spec import AppSpec -from bfabric.experimental.app_interface.input_preparation import prepare_folder -from bfabric.experimental.app_interface.output_registration import register_outputs -from bfabric.experimental.app_interface.workunit.definition import WorkunitDefinition +from app_runner.app_runner._spec import AppSpec +from app_runner.input_preparation import prepare_folder +from app_runner.output_registration import register_outputs +from bfabric.experimental.workunit_definition import WorkunitDefinition class Runner: diff --git a/src/bfabric/experimental/app_interface/dispatch/__init__.py b/app_runner/src/app_runner/cli/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/dispatch/__init__.py rename to app_runner/src/app_runner/cli/__init__.py diff --git a/app_runner/src/app_runner/cli/__main__.py b/app_runner/src/app_runner/cli/__main__.py new file mode 100644 index 00000000..760412be --- /dev/null +++ b/app_runner/src/app_runner/cli/__main__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import cyclopts + +from app_runner.cli.app import app_app +from app_runner.cli.chunk import app_chunk +from app_runner.cli.inputs import app_inputs +from app_runner.cli.outputs import app_outputs +from app_runner.cli.validate import app_validate + +app = cyclopts.App(help="Provides an entrypoint to app execution.\n\nFunctionality/API under active development!") +app.command(app_inputs) +app.command(app_outputs) +app.command(app_app) +app.command(app_chunk) +app.command(app_validate) + +if __name__ == "__main__": + app() diff --git a/src/bfabric/experimental/app_interface/cli/app.py b/app_runner/src/app_runner/cli/app.py similarity index 91% rename from src/bfabric/experimental/app_interface/cli/app.py rename to app_runner/src/app_runner/cli/app.py index b0232bac..e173a20e 100644 --- a/src/bfabric/experimental/app_interface/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -7,8 +7,8 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.app_runner._spec import AppSpec -from bfabric.experimental.app_interface.app_runner.runner import run_app, Runner +from app_runner.app_runner._spec import AppSpec +from app_runner.app_runner.runner import run_app, Runner from bfabric.experimental.entity_lookup_cache import EntityLookupCache app_app = cyclopts.App("app", help="Run an app.") diff --git a/src/bfabric/experimental/app_interface/cli/chunk.py b/app_runner/src/app_runner/cli/chunk.py similarity index 95% rename from src/bfabric/experimental/app_interface/cli/chunk.py rename to app_runner/src/app_runner/cli/chunk.py index d5394c99..d04dfa09 100644 --- a/src/bfabric/experimental/app_interface/cli/chunk.py +++ b/app_runner/src/app_runner/cli/chunk.py @@ -7,8 +7,8 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.app_runner._spec import AppSpec -from bfabric.experimental.app_interface.app_runner.runner import run_app, Runner +from app_runner.app_runner._spec import AppSpec +from app_runner.app_runner.runner import run_app, Runner from bfabric.experimental.entity_lookup_cache import EntityLookupCache app_chunk = cyclopts.App("chunk", help="Run an app on a chunk. You can create the chunks with `app dispatch`.") diff --git a/src/bfabric/experimental/app_interface/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py similarity index 90% rename from src/bfabric/experimental/app_interface/cli/inputs.py rename to app_runner/src/app_runner/cli/inputs.py index 51177cbe..b64ee81a 100644 --- a/src/bfabric/experimental/app_interface/cli/inputs.py +++ b/app_runner/src/app_runner/cli/inputs.py @@ -6,10 +6,10 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.input_preparation import prepare_folder -from bfabric.experimental.app_interface.input_preparation._spec import InputsSpec -from bfabric.experimental.app_interface.input_preparation.integrity import IntegrityState -from bfabric.experimental.app_interface.input_preparation.list_inputs import ( +from app_runner.input_preparation import prepare_folder +from app_runner.input_preparation._spec import InputsSpec +from app_runner.input_preparation.integrity import IntegrityState +from app_runner.input_preparation.list_inputs import ( list_input_states, print_input_states, FileState, @@ -72,7 +72,7 @@ def get_inputs_and_print( client = Bfabric.from_config() input_states = list_input_states( specs=InputsSpec.read_yaml(inputs_yaml), - target_folder=target_folder or Path("."), + target_folder=target_folder or Path(""), client=client, check_files=check, ) diff --git a/src/bfabric/experimental/app_interface/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py similarity index 84% rename from src/bfabric/experimental/app_interface/cli/outputs.py rename to app_runner/src/app_runner/cli/outputs.py index 2a656d54..43a95d9d 100644 --- a/src/bfabric/experimental/app_interface/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -7,8 +7,8 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from bfabric.entities import Workunit -from bfabric.experimental.app_interface.output_registration._spec import OutputsSpec -from bfabric.experimental.app_interface.output_registration.register import register_all +from app_runner.output_registration._spec import OutputsSpec +from app_runner.output_registration.register import register_all app_outputs = cyclopts.App("outputs", help="Register output files for an app.") diff --git a/src/bfabric/experimental/app_interface/cli/validate.py b/app_runner/src/app_runner/cli/validate.py similarity index 78% rename from src/bfabric/experimental/app_interface/cli/validate.py rename to app_runner/src/app_runner/cli/validate.py index ecb6e527..75709093 100644 --- a/src/bfabric/experimental/app_interface/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -7,9 +7,9 @@ import rich.pretty import yaml -from bfabric.experimental.app_interface.app_runner._spec import AppSpec -from bfabric.experimental.app_interface.input_preparation._spec import InputsSpec -from bfabric.experimental.app_interface.output_registration._spec import OutputsSpec +from app_runner.app_runner._spec import AppSpec +from app_runner.input_preparation._spec import InputsSpec +from app_runner.output_registration._spec import OutputsSpec app_validate = cyclopts.App("validate", help="Validate yaml files.") diff --git a/src/bfabric/experimental/app_interface/util/__init__.py b/app_runner/src/app_runner/dispatch/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/util/__init__.py rename to app_runner/src/app_runner/dispatch/__init__.py diff --git a/src/bfabric/experimental/app_interface/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py similarity index 98% rename from src/bfabric/experimental/app_interface/dispatch/dispatch_individual_resources.py rename to app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index 21560c20..69e5d6c6 100644 --- a/src/bfabric/experimental/app_interface/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -9,7 +9,7 @@ from bfabric import Bfabric from bfabric.entities import Resource, Dataset -from bfabric.experimental.app_interface.workunit.definition import WorkunitDefinition +from bfabric.experimental.workunit_definition import WorkunitDefinition class ConfigResourceFlow(BaseModel): diff --git a/src/bfabric/experimental/app_interface/input_preparation/__init__.py b/app_runner/src/app_runner/input_preparation/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/input_preparation/__init__.py rename to app_runner/src/app_runner/input_preparation/__init__.py diff --git a/src/bfabric/experimental/app_interface/input_preparation/__main__.py b/app_runner/src/app_runner/input_preparation/__main__.py similarity index 90% rename from src/bfabric/experimental/app_interface/input_preparation/__main__.py rename to app_runner/src/app_runner/input_preparation/__main__.py index ec8879b6..3b7ae657 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/__main__.py +++ b/app_runner/src/app_runner/input_preparation/__main__.py @@ -5,7 +5,7 @@ from bfabric.bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.input_preparation.prepare import prepare_folder +from app_runner.input_preparation.prepare import prepare_folder def main() -> None: diff --git a/src/bfabric/experimental/app_interface/input_preparation/_spec.py b/app_runner/src/app_runner/input_preparation/_spec.py similarity index 100% rename from src/bfabric/experimental/app_interface/input_preparation/_spec.py rename to app_runner/src/app_runner/input_preparation/_spec.py diff --git a/src/bfabric/experimental/app_interface/input_preparation/integrity.py b/app_runner/src/app_runner/input_preparation/integrity.py similarity index 90% rename from src/bfabric/experimental/app_interface/input_preparation/integrity.py rename to app_runner/src/app_runner/input_preparation/integrity.py index cdd9d8bb..d7bac53d 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/integrity.py +++ b/app_runner/src/app_runner/input_preparation/integrity.py @@ -5,8 +5,8 @@ from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset -from bfabric.experimental.app_interface.input_preparation._spec import InputSpecType, ResourceSpec, DatasetSpec -from bfabric.experimental.app_interface.util.checksums import md5sum +from app_runner.input_preparation._spec import InputSpecType, ResourceSpec, DatasetSpec +from app_runner.util.checksums import md5sum class IntegrityState(Enum): diff --git a/src/bfabric/experimental/app_interface/input_preparation/list_inputs.py b/app_runner/src/app_runner/input_preparation/list_inputs.py similarity index 88% rename from src/bfabric/experimental/app_interface/input_preparation/list_inputs.py rename to app_runner/src/app_runner/input_preparation/list_inputs.py index a48be526..4034a4d0 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/list_inputs.py +++ b/app_runner/src/app_runner/input_preparation/list_inputs.py @@ -7,8 +7,8 @@ from rich.table import Table, Column from bfabric.bfabric import Bfabric -from bfabric.experimental.app_interface.input_preparation._spec import InputSpecType -from bfabric.experimental.app_interface.input_preparation.integrity import check_integrity, IntegrityState +from app_runner.input_preparation._spec import InputSpecType +from app_runner.input_preparation.integrity import check_integrity, IntegrityState @dataclass diff --git a/src/bfabric/experimental/app_interface/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py similarity index 91% rename from src/bfabric/experimental/app_interface/input_preparation/prepare.py rename to app_runner/src/app_runner/input_preparation/prepare.py index 2c35a05a..83a764af 100644 --- a/src/bfabric/experimental/app_interface/input_preparation/prepare.py +++ b/app_runner/src/app_runner/input_preparation/prepare.py @@ -6,16 +6,16 @@ from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset -from bfabric.experimental.app_interface.input_preparation._spec import ( +from app_runner.input_preparation._spec import ( ResourceSpec, DatasetSpec, InputSpecType, InputsSpec, ) -from bfabric.experimental.app_interface.input_preparation.integrity import IntegrityState -from bfabric.experimental.app_interface.input_preparation.list_inputs import list_input_states -from bfabric.experimental.app_interface.util.checksums import md5sum -from bfabric.experimental.app_interface.util.scp import scp +from app_runner.input_preparation.integrity import IntegrityState +from app_runner.input_preparation.list_inputs import list_input_states +from app_runner.util.checksums import md5sum +from app_runner.util.scp import scp class PrepareInputs: diff --git a/src/bfabric/experimental/app_interface/output_registration/__init__.py b/app_runner/src/app_runner/output_registration/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/output_registration/__init__.py rename to app_runner/src/app_runner/output_registration/__init__.py diff --git a/src/bfabric/experimental/app_interface/output_registration/__main__.py b/app_runner/src/app_runner/output_registration/__main__.py similarity index 89% rename from src/bfabric/experimental/app_interface/output_registration/__main__.py rename to app_runner/src/app_runner/output_registration/__main__.py index ee0938d0..288f8900 100644 --- a/src/bfabric/experimental/app_interface/output_registration/__main__.py +++ b/app_runner/src/app_runner/output_registration/__main__.py @@ -3,7 +3,7 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging -from bfabric.experimental.app_interface.output_registration import register_outputs +from app_runner.output_registration import register_outputs def main() -> None: diff --git a/src/bfabric/experimental/app_interface/output_registration/_spec.py b/app_runner/src/app_runner/output_registration/_spec.py similarity index 100% rename from src/bfabric/experimental/app_interface/output_registration/_spec.py rename to app_runner/src/app_runner/output_registration/_spec.py diff --git a/src/bfabric/experimental/app_interface/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py similarity index 95% rename from src/bfabric/experimental/app_interface/output_registration/register.py rename to app_runner/src/app_runner/output_registration/register.py index 65c0e9ae..f2619925 100644 --- a/src/bfabric/experimental/app_interface/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -6,15 +6,15 @@ from bfabric import Bfabric from bfabric.entities import Storage, Workunit -from bfabric.experimental.app_interface.output_registration._spec import ( +from app_runner.output_registration._spec import ( CopyResourceSpec, UpdateExisting, OutputsSpec, SpecType, SaveDatasetSpec, ) -from bfabric.experimental.app_interface.util.checksums import md5sum -from bfabric.experimental.app_interface.util.scp import scp +from app_runner.util.checksums import md5sum +from app_runner.util.scp import scp from bfabric_scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset diff --git a/src/bfabric/experimental/app_interface/workunit/__init__.py b/app_runner/src/app_runner/util/__init__.py similarity index 100% rename from src/bfabric/experimental/app_interface/workunit/__init__.py rename to app_runner/src/app_runner/util/__init__.py diff --git a/src/bfabric/experimental/app_interface/util/checksums.py b/app_runner/src/app_runner/util/checksums.py similarity index 100% rename from src/bfabric/experimental/app_interface/util/checksums.py rename to app_runner/src/app_runner/util/checksums.py diff --git a/src/bfabric/experimental/app_interface/util/scp.py b/app_runner/src/app_runner/util/scp.py similarity index 100% rename from src/bfabric/experimental/app_interface/util/scp.py rename to app_runner/src/app_runner/util/scp.py diff --git a/src/bfabric/experimental/app_interface/cli/__main__.py b/src/bfabric/experimental/app_interface/cli/__main__.py deleted file mode 100644 index 56868382..00000000 --- a/src/bfabric/experimental/app_interface/cli/__main__.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -import cyclopts - -from bfabric.experimental.app_interface.cli.app import app_app -from bfabric.experimental.app_interface.cli.chunk import app_chunk -from bfabric.experimental.app_interface.cli.inputs import app_inputs -from bfabric.experimental.app_interface.cli.outputs import app_outputs -from bfabric.experimental.app_interface.cli.validate import app_validate - -app = cyclopts.App(help="Provides an entrypoint to app execution.\n\nFunctionality/API under active development!") -app.command(app_inputs) -app.command(app_outputs) -app.command(app_app) -app.command(app_chunk) -app.command(app_validate) - -if __name__ == "__main__": - app() diff --git a/src/bfabric/experimental/app_interface/workunit/definition.py b/src/bfabric/experimental/workunit_definition.py similarity index 100% rename from src/bfabric/experimental/app_interface/workunit/definition.py rename to src/bfabric/experimental/workunit_definition.py diff --git a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py index 07087d62..e0e59959 100644 --- a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py +++ b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -10,7 +10,7 @@ from bfabric import Bfabric from bfabric.entities import Workunit, ExternalJob, Application, Resource, Storage, Order, Project -from bfabric.experimental.app_interface.workunit.definition import WorkunitDefinition +from bfabric.experimental.workunit_definition import WorkunitDefinition class BfabricWrapperCreator: From 9bb8c3b32af9d007c0ae17aeb0d8d1554f01de91 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 12:06:09 +0200 Subject: [PATCH 044/144] name the artifact --- .github/workflows/build_app_runner.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index 2cb15ce6..f90079d9 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -14,4 +14,5 @@ jobs: - name: Upload uses: actions/upload-artifact@v4 with: + name: app_runner_linux_x86_64 path: build-output From 6d75c82debf53f83adfd04b77cb0ac18a090483f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 13:36:49 +0200 Subject: [PATCH 045/144] verify build before upload --- .github/workflows/build_app_runner.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index f90079d9..2a07b84b 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -11,6 +11,9 @@ jobs: run: | bash app_runner/deploy/build.sh build-output app_runner ls -l build-output + - name: Verify + run: | + build-output/app_runner/app_runner --help - name: Upload uses: actions/upload-artifact@v4 with: From 71cb216809faba889002b6196e62919aa5e26a11 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 18 Oct 2024 13:52:13 +0200 Subject: [PATCH 046/144] add initial readme --- app_runner/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 app_runner/README.md diff --git a/app_runner/README.md b/app_runner/README.md new file mode 100644 index 00000000..410770c6 --- /dev/null +++ b/app_runner/README.md @@ -0,0 +1,15 @@ +The app runner is an experimental tool that standardizes the integration of one-off apps into B-Fabric. + +**The API is subject to drastic changes in the next time.** + +The main idea is that an app provides a specification of the following steps: + +- dispatch -> create `inputs.yml` files and 1 `chunks.yml` file +- process -> process a particular chunk (after inputs have been prepared) +- collect -> collect the results of a chunk and create `outputs.yml` files + +The individual app can be in a container environment or a script running in the same environment as the app runner. + +To make this possible input and output staging is abstracted and communicated through `inputs.yml` and `outputs.yml` +specification files. +A command is available to stage the inputs or register the outputs respectively then. From 5cc9c32a7fb5f2c87a71a30d57eefa296533968f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 21 Oct 2024 12:50:24 +0200 Subject: [PATCH 047/144] remove the old script --- docs/changelog.md | 2 +- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 95eb32cd..1c1c16a9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -13,9 +13,9 @@ Versioning currently follows `X.Y.Z` where ### Added - Relationship: `ExternalJob.executable` -- (experimental) Provide a script `bfabric-app-runner` to allow testing with pipx. - (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. - (experimental) app_interface cli "inputs check" to validate the local files. +- (experimental) Create a new package `app-runner` to decouple this a bit more. ### Fixed diff --git a/pyproject.toml b/pyproject.toml index fb9cb48c..935e7405 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,6 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_save_workunit_attribute.py"="bfabric_scripts.bfabric_save_workunit_attribute:main" "bfabric_save_workflowstep.py"="bfabric_scripts.bfabric_save_workflowstep:main" "bfabric_slurm_queue_status.py"="bfabric_scripts.bfabric_slurm_queue_status:main" -"bfabric-app-runner"="bfabric.experimental.app_interface.cli.__main__:app" [tool.black] line-length = 120 From 4ba851ab22a520edc74a2322900cf521370ce4e6 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 21 Oct 2024 12:54:01 +0200 Subject: [PATCH 048/144] add install command --- docs/app_runner/index.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/app_runner/index.md diff --git a/docs/app_runner/index.md b/docs/app_runner/index.md new file mode 100644 index 00000000..c163ba62 --- /dev/null +++ b/docs/app_runner/index.md @@ -0,0 +1,5 @@ +## Install App Runner + +```bash +pipx install app_runner@git+https://github.com/fgcz/bfabricPy.git@main#egg=app_runner&subdirectory=app_runner +``` From c41247e7eb1f41185cbb32ee8606df5e5553372b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 21 Oct 2024 12:58:26 +0200 Subject: [PATCH 049/144] enable caching --- app_runner/src/app_runner/cli/app.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py index e173a20e..e2297ed3 100644 --- a/app_runner/src/app_runner/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -55,5 +55,6 @@ def dispatch( work_dir = work_dir.resolve() # TODO set workunit to processing? (i.e. add read-only option here) client = Bfabric.from_config() - runner = Runner(spec=AppSpec.model_validate(yaml.safe_load(app_spec.read_text())), client=client, ssh_user=None) - runner.run_dispatch(workunit_ref=workunit_ref, work_dir=work_dir) + with EntityLookupCache.enable(): + runner = Runner(spec=AppSpec.model_validate(yaml.safe_load(app_spec.read_text())), client=client, ssh_user=None) + runner.run_dispatch(workunit_ref=workunit_ref, work_dir=work_dir) From e5f8cd852ca9ae002fc174045c1722047a17fa41 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 21 Oct 2024 13:08:43 +0200 Subject: [PATCH 050/144] only reconfigure this once --- src/bfabric/cli_formatting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bfabric/cli_formatting.py b/src/bfabric/cli_formatting.py index f558b86a..ea3dfc8f 100644 --- a/src/bfabric/cli_formatting.py +++ b/src/bfabric/cli_formatting.py @@ -18,6 +18,9 @@ class HostnameHighlighter(RegexHighlighter): def setup_script_logging(debug: bool = False) -> None: """Sets up the logging for the command line scripts.""" + setup_flag_key = "BFABRICPY_SCRIPT_LOGGING_SETUP" + if os.environ.get(setup_flag_key, "0") == "1": + return logger.remove() if not (debug or os.environ.get("BFABRICPY_DEBUG")): logger.add(sys.stderr, filter="bfabric", level="INFO", format="{level} {message}") @@ -25,3 +28,4 @@ def setup_script_logging(debug: bool = False) -> None: else: logger.add(sys.stderr, filter="bfabric", level="DEBUG") logger.add(sys.stderr, filter="__main__", level="DEBUG") + os.environ[setup_flag_key] = "1" From f28f844713a55891ec7a91c495f3ca0a7f7597fb Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 23 Oct 2024 15:59:45 +0200 Subject: [PATCH 051/144] split changelog --- app_runner/docs/changelog.md | 14 ++++++++++++++ docs/changelog.md | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 app_runner/docs/changelog.md diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md new file mode 100644 index 00000000..f536fa7e --- /dev/null +++ b/app_runner/docs/changelog.md @@ -0,0 +1,14 @@ +# Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## \[0.0.2\] - 2024-10-23 + +### Added + +- App spec supports changing docker entrypoint. +- `bfabric-app-runner inputs check` to validate the local files + +### Fixed + +- `bfabric-app-runner inputs list` does not fail anymore if resources have no "name" field value. diff --git a/docs/changelog.md b/docs/changelog.md index 1c1c16a9..4785569a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,17 +10,17 @@ Versioning currently follows `X.Y.Z` where ## \[Unreleased\] +From this release onwards, the experimental app runner is not part of the main bfabric package and +instead a separate Python package with its individual changelog. + ### Added - Relationship: `ExternalJob.executable` - (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. -- (experimental) app_interface cli "inputs check" to validate the local files. -- (experimental) Create a new package `app-runner` to decouple this a bit more. ### Fixed - `Entity.find_all` returns no values when an empty list is passed as an argument. -- (experimental) app_interface cli "inputs list" does not fail anymore if resources have no "name" field value. ### Changed From 59fa4ed53e12df1700b48cf48c4f5655991136c8 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 23 Oct 2024 15:59:53 +0200 Subject: [PATCH 052/144] version --- app_runner/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 2021d7bf..58904f61 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "app_runner" description = "Application runner for B-Fabric apps" -version = "0.1.0" +version = "0.0.1" license = { text = "GPL-3.0" } authors = [ {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, From 6529fea7f5b08410f4fa66c6ff111232d3429fb5 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 23 Oct 2024 16:00:13 +0200 Subject: [PATCH 053/144] allow specifying entrypoint --- app_runner/src/app_runner/app_runner/_spec.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 0419257e..8e02923f 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -42,6 +42,7 @@ class CommandDocker(BaseModel): type: Literal["docker"] = "docker" image: str command: str + entrypoint: str | None = None engine: str = "docker" mounts: MountOptions = MountOptions() @@ -54,6 +55,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: target = shlex.quote(str(container)) mount_args.append("--mount") mount_args.append(f"type=bind,source={source},target={target}" + (",readonly" if read_only else "")) + entrypoint_arg = ["--entrypoint", self.entrypoint] if self.entrypoint else [] return [ self.engine, "run", @@ -61,6 +63,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: f"{os.getuid()}:{os.getgid()}", "--rm", *mount_args, + *entrypoint_arg, self.image, *shlex.split(self.command), ] From cd677cbb7983f4c3d5bc75a0cec899be7d627c25 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 23 Oct 2024 16:03:30 +0200 Subject: [PATCH 054/144] supply version info --- app_runner/src/app_runner/cli/__main__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app_runner/src/app_runner/cli/__main__.py b/app_runner/src/app_runner/cli/__main__.py index 760412be..b5fc0756 100644 --- a/app_runner/src/app_runner/cli/__main__.py +++ b/app_runner/src/app_runner/cli/__main__.py @@ -1,5 +1,7 @@ from __future__ import annotations +import importlib.metadata + import cyclopts from app_runner.cli.app import app_app @@ -8,7 +10,12 @@ from app_runner.cli.outputs import app_outputs from app_runner.cli.validate import app_validate -app = cyclopts.App(help="Provides an entrypoint to app execution.\n\nFunctionality/API under active development!") +package_version = importlib.metadata.version("app_runner") + +app = cyclopts.App( + help="Provides an entrypoint to app execution.\n\nFunctionality/API under active development!", + version=package_version, +) app.command(app_inputs) app.command(app_outputs) app.command(app_app) From 8856831c674bd95d0da8e88c5a4869d660f035bd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 23 Oct 2024 16:03:51 +0200 Subject: [PATCH 055/144] app runner v0.0.2 --- app_runner/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 58904f61..77b4305f 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "app_runner" description = "Application runner for B-Fabric apps" -version = "0.0.1" +version = "0.0.2" license = { text = "GPL-3.0" } authors = [ {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, From a19a2c7acb91c25a7007e607cb91de37afaf21a7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 24 Oct 2024 10:02:29 +0200 Subject: [PATCH 056/144] allow to specify env variables --- app_runner/docs/changelog.md | 6 ++++++ app_runner/src/app_runner/app_runner/_spec.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index f536fa7e..a2e3341f 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -2,6 +2,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## \[0.0.3\] - 2024-10-24 + +### Added + +- Specify environment variables for docker container in spec. + ## \[0.0.2\] - 2024-10-23 ### Added diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 8e02923f..b8f5c35f 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -44,6 +44,7 @@ class CommandDocker(BaseModel): command: str entrypoint: str | None = None engine: str = "docker" + env: dict[str, str] = {} mounts: MountOptions = MountOptions() def to_shell(self, work_dir: Path | None = None) -> list[str]: @@ -56,6 +57,11 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: mount_args.append("--mount") mount_args.append(f"type=bind,source={source},target={target}" + (",readonly" if read_only else "")) entrypoint_arg = ["--entrypoint", self.entrypoint] if self.entrypoint else [] + env_args = [] + for key, value in self.env.items(): + env_args.append("--env") + env_args.append(f"{key}={shlex.quote(value)}") + return [ self.engine, "run", @@ -64,6 +70,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: "--rm", *mount_args, *entrypoint_arg, + *env_args, self.image, *shlex.split(self.command), ] From b0596199f6752ab3c9f7e5caa090ab10b61fde06 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 24 Oct 2024 10:03:43 +0200 Subject: [PATCH 057/144] add unreleased section --- app_runner/docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index a2e3341f..f37119a4 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -2,6 +2,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## \[Unreleased\] + ## \[0.0.3\] - 2024-10-24 ### Added From a3365dd9542686089bf35f06eaaec31e50581f3f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 25 Oct 2024 08:41:46 +0200 Subject: [PATCH 058/144] start document input staging --- {docs/app_runner => app_runner/docs}/index.md | 0 app_runner/docs/inputs_staging.md | 77 +++++++++++++++++++ app_runner/mkdocs.yml | 42 ++++++++++ app_runner/pyproject.toml | 8 ++ 4 files changed, 127 insertions(+) rename {docs/app_runner => app_runner/docs}/index.md (100%) create mode 100644 app_runner/docs/inputs_staging.md create mode 100644 app_runner/mkdocs.yml diff --git a/docs/app_runner/index.md b/app_runner/docs/index.md similarity index 100% rename from docs/app_runner/index.md rename to app_runner/docs/index.md diff --git a/app_runner/docs/inputs_staging.md b/app_runner/docs/inputs_staging.md new file mode 100644 index 00000000..a3f95e4c --- /dev/null +++ b/app_runner/docs/inputs_staging.md @@ -0,0 +1,77 @@ +The inputs module provides a specification schema to define the inputs required by an app. +You can also use this functionality interactively while prototyping. +The file is usually called `inputs.yml` and lists the different inputs, with information and how to retrieve them and the filename to save them as. + +## Input specification + +### General structure + +Generally the structure is a yaml file containing a key `inputs` which is a list of dictionaries, each representing an input file. +Each input has a `type` key which identifies the input type. +This will allow us to extend this logic to different sources in the future. + +In general the only other input key that will be available for all types is `filename`, which is the name of the file to save the input as. +Fields like `id` might not be relevant for all types in the future, and depending on the type more specific options might exist. + +An example file could look like this: + +```yaml +# file: inputs.yml +inputs: + - type: bfabric_dataset + id: 53706 + filename: test.csv + - type: bfabric_resource + id: 2700958 + filename: test.zip +``` + +## Commands + +### Validation + +The input file can be validated with the command: + +```bash +bfabric-app-runner validate inputs-spec inputs.yml +``` + +Which on success will output a pretty-printed version of the inputs file. +Validation will also be performed by all other commands, so this is not strictly necessary. + +For instance, in the above case this would print: + +``` +InputsSpec( +│ inputs=[ +│ │ DatasetSpec(type='bfabric_dataset', id=53706, filename='test.csv', separator=','), +│ │ ResourceSpec(type='bfabric_resource', id=2700958, filename='test.zip', check_checksum=True) +│ ] +) +``` + +### Prepare files + +The prepare command downloads your files and requires two arguments. +The first is the input file, and the second is the directory to save the files to. +In general to download to the current directory simply use `.` as the second argument: + +```bash +bfabric-app-runner inputs prepare inputs.yml . +``` + +If your files already exist and are up-to-date, it will not download them again. + +### List files + +You can list the files that are present or will be downloaded: + +```bash +bfabric-app-runner inputs list inputs.yml . +``` + +If you also want to check whether the files are up-to-date, you can pass the `--check` flag: + +```bash +bfabric-app-runner inputs list --check inputs.yml . +``` diff --git a/app_runner/mkdocs.yml b/app_runner/mkdocs.yml new file mode 100644 index 00000000..0d3d4013 --- /dev/null +++ b/app_runner/mkdocs.yml @@ -0,0 +1,42 @@ +site_name: app_runner docs +theme: + name: material + features: + - navigation.sections + - navigation.instant + - content.code.copy + palette: + - scheme: slate + primary: black + toggle: + icon: material/brightness-7 + name: Light + - scheme: default + primary: black + toggle: + icon: material/brightness-4 + name: Dark +nav: + - '...' + - changelog.md +markdown_extensions: + - admonition + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences +plugins: + - mkdocstrings: + handlers: + python: + options: + annotations_path: source + show_category_heading: yes + show_root_heading: yes + show_symbol_type_heading: yes + show_source: no + members_order: alphabetical + - awesome-pages diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 77b4305f..fc3f59c4 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -17,3 +17,11 @@ dependencies = [ [project.scripts] "bfabric-app-runner"="app_runner.cli.__main__:app" + +[project.optional-dependencies] +doc = [ + "mkdocs", + "mkdocs-material", + "mkdocstrings[python]", + "mkdocs-awesome-pages-plugin", +] From da15151fa1a66766512f3a4786f89b6fb7255f71 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 25 Oct 2024 09:02:32 +0200 Subject: [PATCH 059/144] try to format the docs --- app_runner/docs/inputs_staging.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/app_runner/docs/inputs_staging.md b/app_runner/docs/inputs_staging.md index a3f95e4c..e12d3c0b 100644 --- a/app_runner/docs/inputs_staging.md +++ b/app_runner/docs/inputs_staging.md @@ -50,6 +50,8 @@ InputsSpec( ) ``` +Here you can also see all the extra parameters which were implicitly set. + ### Prepare files The prepare command downloads your files and requires two arguments. @@ -75,3 +77,12 @@ If you also want to check whether the files are up-to-date, you can pass the `-- ```bash bfabric-app-runner inputs list --check inputs.yml . ``` + +## Reference + +::: app_runner.input_preparation.\_spec.DatasetSpec +options: +heading_level: 3 +::: app_runner.input_preparation.\_spec.ResourceSpec +options: +heading_level: 3 From 0eecc8cd61b1db0ab60a2691f6593fd0e2073fda Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 25 Oct 2024 09:41:54 +0200 Subject: [PATCH 060/144] documentation --- app_runner/docs/inputs_staging.md | 24 ++++++----- app_runner/mkdocs.yml | 42 ------------------- app_runner/pyproject.toml | 8 ++-- app_runner/src/app_runner/cli/inputs.py | 2 +- app_runner/src/app_runner/cli/validate.py | 2 +- .../app_runner/input_preparation/integrity.py | 2 +- .../input_preparation/list_inputs.py | 2 +- .../app_runner/input_preparation/prepare.py | 2 +- .../input_preparation/{_spec.py => spec.py} | 10 +++++ 9 files changed, 33 insertions(+), 61 deletions(-) delete mode 100644 app_runner/mkdocs.yml rename app_runner/src/app_runner/input_preparation/{_spec.py => spec.py} (88%) diff --git a/app_runner/docs/inputs_staging.md b/app_runner/docs/inputs_staging.md index e12d3c0b..1e86fef4 100644 --- a/app_runner/docs/inputs_staging.md +++ b/app_runner/docs/inputs_staging.md @@ -1,17 +1,21 @@ The inputs module provides a specification schema to define the inputs required by an app. You can also use this functionality interactively while prototyping. -The file is usually called `inputs.yml` and lists the different inputs, with information and how to retrieve them and the filename to save them as. +The file is usually called `inputs.yml` and lists the different inputs, with information and how to retrieve them and +the filename to save them as. ## Input specification ### General structure -Generally the structure is a yaml file containing a key `inputs` which is a list of dictionaries, each representing an input file. +Generally the structure is a yaml file containing a key `inputs` which is a list of dictionaries, each representing an +input file. Each input has a `type` key which identifies the input type. This will allow us to extend this logic to different sources in the future. -In general the only other input key that will be available for all types is `filename`, which is the name of the file to save the input as. -Fields like `id` might not be relevant for all types in the future, and depending on the type more specific options might exist. +In general the only other input key that will be available for all types is `filename`, which is the name of the file to +save the input as. +Fields like `id` might not be relevant for all types in the future, and depending on the type more specific options +might exist. An example file could look like this: @@ -80,9 +84,9 @@ bfabric-app-runner inputs list --check inputs.yml . ## Reference -::: app_runner.input_preparation.\_spec.DatasetSpec -options: -heading_level: 3 -::: app_runner.input_preparation.\_spec.ResourceSpec -options: -heading_level: 3 +```{eval-rst} +.. automodule:: app_runner.input_preparation.spec + :members: + :undoc-members: + :show-inheritance: +``` diff --git a/app_runner/mkdocs.yml b/app_runner/mkdocs.yml deleted file mode 100644 index 0d3d4013..00000000 --- a/app_runner/mkdocs.yml +++ /dev/null @@ -1,42 +0,0 @@ -site_name: app_runner docs -theme: - name: material - features: - - navigation.sections - - navigation.instant - - content.code.copy - palette: - - scheme: slate - primary: black - toggle: - icon: material/brightness-7 - name: Light - - scheme: default - primary: black - toggle: - icon: material/brightness-4 - name: Dark -nav: - - '...' - - changelog.md -markdown_extensions: - - admonition - - pymdownx.highlight: - anchor_linenums: true - line_spans: __span - pygments_lang_class: true - - pymdownx.inlinehilite - - pymdownx.snippets - - pymdownx.superfences -plugins: - - mkdocstrings: - handlers: - python: - options: - annotations_path: source - show_category_heading: yes - show_root_heading: yes - show_symbol_type_heading: yes - show_source: no - members_order: alphabetical - - awesome-pages diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index fc3f59c4..a06cfefc 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -20,8 +20,8 @@ dependencies = [ [project.optional-dependencies] doc = [ - "mkdocs", - "mkdocs-material", - "mkdocstrings[python]", - "mkdocs-awesome-pages-plugin", + "sphinx", + "myst-parser", + "autodoc_pydantic", + "sphinx-book-theme", ] diff --git a/app_runner/src/app_runner/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py index b64ee81a..50327d49 100644 --- a/app_runner/src/app_runner/cli/inputs.py +++ b/app_runner/src/app_runner/cli/inputs.py @@ -7,7 +7,7 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from app_runner.input_preparation import prepare_folder -from app_runner.input_preparation._spec import InputsSpec +from app_runner.input_preparation.spec import InputsSpec from app_runner.input_preparation.integrity import IntegrityState from app_runner.input_preparation.list_inputs import ( list_input_states, diff --git a/app_runner/src/app_runner/cli/validate.py b/app_runner/src/app_runner/cli/validate.py index 75709093..3caf0cad 100644 --- a/app_runner/src/app_runner/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -8,7 +8,7 @@ import yaml from app_runner.app_runner._spec import AppSpec -from app_runner.input_preparation._spec import InputsSpec +from app_runner.input_preparation.spec import InputsSpec from app_runner.output_registration._spec import OutputsSpec app_validate = cyclopts.App("validate", help="Validate yaml files.") diff --git a/app_runner/src/app_runner/input_preparation/integrity.py b/app_runner/src/app_runner/input_preparation/integrity.py index d7bac53d..ee8e1a09 100644 --- a/app_runner/src/app_runner/input_preparation/integrity.py +++ b/app_runner/src/app_runner/input_preparation/integrity.py @@ -5,7 +5,7 @@ from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset -from app_runner.input_preparation._spec import InputSpecType, ResourceSpec, DatasetSpec +from app_runner.input_preparation.spec import InputSpecType, ResourceSpec, DatasetSpec from app_runner.util.checksums import md5sum diff --git a/app_runner/src/app_runner/input_preparation/list_inputs.py b/app_runner/src/app_runner/input_preparation/list_inputs.py index 4034a4d0..6f4f73dc 100644 --- a/app_runner/src/app_runner/input_preparation/list_inputs.py +++ b/app_runner/src/app_runner/input_preparation/list_inputs.py @@ -7,7 +7,7 @@ from rich.table import Table, Column from bfabric.bfabric import Bfabric -from app_runner.input_preparation._spec import InputSpecType +from app_runner.input_preparation.spec import InputSpecType from app_runner.input_preparation.integrity import check_integrity, IntegrityState diff --git a/app_runner/src/app_runner/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py index 83a764af..5c9a7646 100644 --- a/app_runner/src/app_runner/input_preparation/prepare.py +++ b/app_runner/src/app_runner/input_preparation/prepare.py @@ -6,7 +6,7 @@ from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset -from app_runner.input_preparation._spec import ( +from app_runner.input_preparation.spec import ( ResourceSpec, DatasetSpec, InputSpecType, diff --git a/app_runner/src/app_runner/input_preparation/_spec.py b/app_runner/src/app_runner/input_preparation/spec.py similarity index 88% rename from app_runner/src/app_runner/input_preparation/_spec.py rename to app_runner/src/app_runner/input_preparation/spec.py index df6b75df..e76f24de 100644 --- a/app_runner/src/app_runner/input_preparation/_spec.py +++ b/app_runner/src/app_runner/input_preparation/spec.py @@ -20,8 +20,13 @@ class ResourceSpec(BaseModel): type: Literal["bfabric_resource"] = "bfabric_resource" id: int + """B-Fabric resource ID""" + filename: RelativeFilePath | None = None + """Target filename to save to""" + check_checksum: bool = True + """Whether to check the checksum of the file, after downloading""" def resolve_filename(self, client: Bfabric) -> str: if self.filename: @@ -36,8 +41,13 @@ class DatasetSpec(BaseModel): type: Literal["bfabric_dataset"] = "bfabric_dataset" id: int + """B-Fabric dataset ID""" + filename: RelativeFilePath + """Target filename to save to""" + separator: Literal[",", "\t"] = "," + """Separator for the CSV file""" # has_header: bool # invalid_characters: str = "" From d09166e496b511d38fa52e48f1eefb8075ed2617 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 25 Oct 2024 10:38:42 +0200 Subject: [PATCH 061/144] docs --- app_runner/docs/Makefile | 20 +++++++++++ app_runner/docs/conf.py | 25 +++++++++++++ app_runner/docs/index.md | 7 ++++ ...puts_staging.md => input_specification.md} | 0 app_runner/docs/make.bat | 35 +++++++++++++++++++ 5 files changed, 87 insertions(+) create mode 100644 app_runner/docs/Makefile create mode 100644 app_runner/docs/conf.py rename app_runner/docs/{inputs_staging.md => input_specification.md} (100%) create mode 100644 app_runner/docs/make.bat diff --git a/app_runner/docs/Makefile b/app_runner/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/app_runner/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/app_runner/docs/conf.py b/app_runner/docs/conf.py new file mode 100644 index 00000000..2531748e --- /dev/null +++ b/app_runner/docs/conf.py @@ -0,0 +1,25 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "App Runner" +copyright = "2024 ETH Zurich" +author = "Leonardo Schwarz" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["myst_parser", "sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic"] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_book_theme" +html_static_path = ["_static"] diff --git a/app_runner/docs/index.md b/app_runner/docs/index.md index c163ba62..d6cc881a 100644 --- a/app_runner/docs/index.md +++ b/app_runner/docs/index.md @@ -1,3 +1,10 @@ +```{toctree} +:glob: +input_specification +changelog +* +``` + ## Install App Runner ```bash diff --git a/app_runner/docs/inputs_staging.md b/app_runner/docs/input_specification.md similarity index 100% rename from app_runner/docs/inputs_staging.md rename to app_runner/docs/input_specification.md diff --git a/app_runner/docs/make.bat b/app_runner/docs/make.bat new file mode 100644 index 00000000..32bb2452 --- /dev/null +++ b/app_runner/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd From 1abed511c5ac8dc8a7582756df2cd04747560b73 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 25 Oct 2024 11:45:50 +0200 Subject: [PATCH 062/144] refactor --- app_runner/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index a06cfefc..2b619475 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -24,4 +24,5 @@ doc = [ "myst-parser", "autodoc_pydantic", "sphinx-book-theme", + "sphinx-autobuild", ] From a340a9706d061a8abaa046edf661f4745b2364cc Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 29 Oct 2024 11:08:49 +0100 Subject: [PATCH 063/144] docs --- app_runner/docs/index.md | 4 +++- app_runner/docs/specs/app_specification.md | 4 ++++ app_runner/docs/{ => specs}/input_specification.md | 4 ++-- app_runner/docs/specs/output_specification.md | 8 ++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 app_runner/docs/specs/app_specification.md rename app_runner/docs/{ => specs}/input_specification.md (100%) create mode 100644 app_runner/docs/specs/output_specification.md diff --git a/app_runner/docs/index.md b/app_runner/docs/index.md index d6cc881a..038cc646 100644 --- a/app_runner/docs/index.md +++ b/app_runner/docs/index.md @@ -1,6 +1,8 @@ ```{toctree} :glob: -input_specification +specs/input_specification +specs/output_specification +specs/app_specification changelog * ``` diff --git a/app_runner/docs/specs/app_specification.md b/app_runner/docs/specs/app_specification.md new file mode 100644 index 00000000..b4620f6f --- /dev/null +++ b/app_runner/docs/specs/app_specification.md @@ -0,0 +1,4 @@ +## App specification + +TODO: not clear if this same document should also explain the individual steps, or if it would make sense to first +describe the app anatomy in a separate document with figures etc. and then list how to specify it diff --git a/app_runner/docs/input_specification.md b/app_runner/docs/specs/input_specification.md similarity index 100% rename from app_runner/docs/input_specification.md rename to app_runner/docs/specs/input_specification.md index 1e86fef4..44d48702 100644 --- a/app_runner/docs/input_specification.md +++ b/app_runner/docs/specs/input_specification.md @@ -1,10 +1,10 @@ +## Input specification + The inputs module provides a specification schema to define the inputs required by an app. You can also use this functionality interactively while prototyping. The file is usually called `inputs.yml` and lists the different inputs, with information and how to retrieve them and the filename to save them as. -## Input specification - ### General structure Generally the structure is a yaml file containing a key `inputs` which is a list of dictionaries, each representing an diff --git a/app_runner/docs/specs/output_specification.md b/app_runner/docs/specs/output_specification.md new file mode 100644 index 00000000..db7f5592 --- /dev/null +++ b/app_runner/docs/specs/output_specification.md @@ -0,0 +1,8 @@ +## Output specification + +The outputs module provides a specification schema to define the outputs that were created by an app and should be registered. +The file is usually called `outputs.yml` and lists the different output files, with information how to register them. + +### General structure + +To be described. From 39681348d968ab4a5fd8db5ffd4527db06193551 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 29 Oct 2024 12:16:56 +0100 Subject: [PATCH 064/144] delete old scripts --- .../app_runner/input_preparation/__main__.py | 30 ------------------- .../output_registration/__main__.py | 23 -------------- 2 files changed, 53 deletions(-) delete mode 100644 app_runner/src/app_runner/input_preparation/__main__.py delete mode 100644 app_runner/src/app_runner/output_registration/__main__.py diff --git a/app_runner/src/app_runner/input_preparation/__main__.py b/app_runner/src/app_runner/input_preparation/__main__.py deleted file mode 100644 index 3b7ae657..00000000 --- a/app_runner/src/app_runner/input_preparation/__main__.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import annotations - -import argparse -from pathlib import Path - -from bfabric.bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging -from app_runner.input_preparation.prepare import prepare_folder - - -def main() -> None: - setup_script_logging() - client = Bfabric.from_config() - parser = argparse.ArgumentParser() - parser.add_argument("action", default="prepare", choices=["prepare", "clean"]) - parser.add_argument("--inputs-yaml", type=Path, required=True) - parser.add_argument("--target-folder", type=Path, required=False) - parser.add_argument("--ssh-user", type=str, required=False) - args = parser.parse_args() - prepare_folder( - inputs_yaml=args.inputs_yaml, - target_folder=args.target_folder, - ssh_user=args.ssh_user, - client=client, - action=args.action, - ) - - -if __name__ == "__main__": - main() diff --git a/app_runner/src/app_runner/output_registration/__main__.py b/app_runner/src/app_runner/output_registration/__main__.py deleted file mode 100644 index 288f8900..00000000 --- a/app_runner/src/app_runner/output_registration/__main__.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse -from pathlib import Path - -from bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging -from app_runner.output_registration import register_outputs - - -def main() -> None: - setup_script_logging() - client = Bfabric.from_config() - parser = argparse.ArgumentParser() - parser.add_argument("action", default="register", choices=["register"]) - parser.add_argument("--outputs-yaml", type=Path, required=True) - parser.add_argument("--workunit-id", type=int, required=True) - parser.add_argument("--ssh-user", type=str, required=False) - args = parser.parse_args() - register_outputs( - outputs_yaml=args.outputs_yaml, - workunit_id=args.workunit_id, - client=client, - ssh_user=args.ssh_user, - ) From 97929f59576ebcb5452afa1ccc71001ff3a2b00a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 29 Oct 2024 12:17:13 +0100 Subject: [PATCH 065/144] fix incorrect type in model --- app_runner/src/app_runner/output_registration/_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/src/app_runner/output_registration/_spec.py b/app_runner/src/app_runner/output_registration/_spec.py index bffb1f36..0c7276d2 100644 --- a/app_runner/src/app_runner/output_registration/_spec.py +++ b/app_runner/src/app_runner/output_registration/_spec.py @@ -33,7 +33,7 @@ class CopyResourceSpec(BaseModel): class SaveDatasetSpec(BaseModel): model_config = ConfigDict(extra="forbid") - type: Literal["bfabric_save_dataset"] = "bfabric_dataset" + type: Literal["bfabric_dataset"] = "bfabric_dataset" local_path: Path separator: str From 99bd1fe1eefef7376785faee90e8d1c25187be19 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 29 Oct 2024 12:19:04 +0100 Subject: [PATCH 066/144] pass pytype checks --- .../dispatch/dispatch_individual_resources.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index 69e5d6c6..ef2917be 100644 --- a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -28,7 +28,7 @@ class ConfigDispatchIndividualResources(BaseModel): dataset_flow: ConfigDatasetFlow | None @model_validator(mode="after") - def check_at_least_one_flow(self) -> Self: + def check_at_least_one_flow(self) -> ConfigDispatchIndividualResources: if self.resource_flow is None and self.dataset_flow is None: raise ValueError("either resource_flow or dataset_flow must be provided") return self @@ -80,14 +80,13 @@ def _write_chunks(self, chunks: list[Path]) -> None: yaml.safe_dump(data, f) def _dispatch_jobs_resource_flow(self, definition: WorkunitDefinition, params: dict[str, Any]) -> list[Path]: - if self._config.resource_flow is None: + config = self._config.resource_flow + if config is None: raise ValueError("resource_flow is not configured") resources = Resource.find_all(ids=definition.execution.resources, client=self._client) paths = [] for resource in sorted(resources.values()): - if self._config.resource_flow.filter_suffix is not None and not resource["relativepath"].endswith( - self._config.resource_flow.filter_suffix - ): + if config.filter_suffix is not None and not resource["relativepath"].endswith(config.filter_suffix): logger.info( f"Skipping resource {resource['relativepath']!r} as it does not match the extension filter." ) @@ -96,16 +95,15 @@ def _dispatch_jobs_resource_flow(self, definition: WorkunitDefinition, params: d return paths def _dispatch_jobs_dataset_flow(self, definition: WorkunitDefinition, params: dict[str, Any]) -> list[Path]: - if self._config.dataset_flow is None: + config = self._config.dataset_flow + if config is None: raise ValueError("dataset_flow is not configured") dataset = Dataset.find(id=definition.execution.dataset, client=self._client) dataset_df = dataset.to_polars() - resources = Resource.find_all( - ids=dataset_df[self._config.dataset_flow.resource_column].unique().to_list(), client=self._client - ) + resources = Resource.find_all(ids=dataset_df[config.resource_column].unique().to_list(), client=self._client) paths = [] for row in dataset_df.iter_rows(named=True): - resource_id = int(row[self._config.dataset_flow.resource_column]) - row_params = {name: row[dataset_name] for dataset_name, name in self._config.dataset_flow.param_columns} + resource_id = int(row[config.resource_column]) + row_params = {name: row[dataset_name] for dataset_name, name in config.param_columns} paths.append(self.dispatch_job(resource=resources[resource_id], params=params | row_params)) return paths From 56fd9c818ff0e4caaea6e26d4b84df09e004340f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 29 Oct 2024 12:25:46 +0100 Subject: [PATCH 067/144] fix a few wrong type annotations --- src/bfabric/bfabric_config.py | 2 +- src/bfabric/results/response_format_dict.py | 4 ++-- src/bfabric/utils/paginator.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/bfabric/bfabric_config.py b/src/bfabric/bfabric_config.py index 0ccf2933..cf4ca7e9 100644 --- a/src/bfabric/bfabric_config.py +++ b/src/bfabric/bfabric_config.py @@ -12,7 +12,7 @@ def read_config( config_path: str | Path, - config_env: str = None, + config_env: str | None = None, ) -> tuple[BfabricClientConfig, BfabricAuth | None]: """ Reads bfabricpy.yml file, parses it, extracting authentication and configuration data diff --git a/src/bfabric/results/response_format_dict.py b/src/bfabric/results/response_format_dict.py index fb7e0cc5..e05bb5e9 100644 --- a/src/bfabric/results/response_format_dict.py +++ b/src/bfabric/results/response_format_dict.py @@ -33,14 +33,14 @@ def _recursive_drop_empty(response_elem: list | dict) -> None: del response_elem[k] -def drop_empty_elements(response: list | dict, inplace: bool = True) -> list | dict | None: +def drop_empty_elements(response: list | dict, inplace: bool = True) -> list | dict: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is either an empty list or None, the key-value pair gets deleted from the dictionary :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument and return an edited copy - :return: Nothing, or an edited response, depending on `inplace` + :return: An edited response, depending on `inplace` """ response_filtered = deepcopy(response) if not inplace else response _recursive_drop_empty(response_filtered) diff --git a/src/bfabric/utils/paginator.py b/src/bfabric/utils/paginator.py index f20312b3..5bdddb20 100644 --- a/src/bfabric/utils/paginator.py +++ b/src/bfabric/utils/paginator.py @@ -1,12 +1,13 @@ from __future__ import annotations import math +from collections.abc import Generator # Single page query limit for BFabric API (as of time of writing, adapt if it changes) BFABRIC_QUERY_LIMIT = 100 -def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> list: +def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> Generator[list, None, None]: """ :param objs: A list of objects to provide to bfabric as part of a query :param page_size: Number of objects per page From 9717bf0a4f5d3d0dfb24b250f62f883521cdafcf Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 08:49:40 +0100 Subject: [PATCH 068/144] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ee8be9c9..39d119ec 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ bfabric/scripts/query_result.txt build/ dist/ site/ +_build/ From ea7ecd03c63d8027b09160740edc919cc77867e2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 08:46:55 +0100 Subject: [PATCH 069/144] type the errors --- src/bfabric/errors.py | 2 +- src/bfabric/results/result_container.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/bfabric/errors.py b/src/bfabric/errors.py index 28545748..a8d2a634 100644 --- a/src/bfabric/errors.py +++ b/src/bfabric/errors.py @@ -3,7 +3,7 @@ from typing import Any -class BfabricRequestError(Exception): +class BfabricRequestError(RuntimeError): """An error that is returned by the server in response to a full request.""" def __init__(self, message: str) -> None: diff --git a/src/bfabric/results/result_container.py b/src/bfabric/results/result_container.py index 9ac30185..2988b3c7 100644 --- a/src/bfabric/results/result_container.py +++ b/src/bfabric/results/result_container.py @@ -1,20 +1,24 @@ from __future__ import annotations import logging -from typing import Any, TYPE_CHECKING from collections.abc import Iterable +from typing import Any, TYPE_CHECKING import bfabric.results.response_format_dict as formatter if TYPE_CHECKING: import polars + from bfabric.errors import BfabricRequestError class ResultContainer: """Container structure for query results.""" def __init__( - self, results: list[dict[str, Any]], total_pages_api: int | None = None, errors: list | None = None + self, + results: list[dict[str, Any]], + total_pages_api: int | None = None, + errors: list[BfabricRequestError] | None = None, ) -> None: """ :param results: List of BFabric query results @@ -61,8 +65,8 @@ def is_success(self) -> bool: return len(self._errors) == 0 @property - def errors(self) -> list: - """List of errors that occurred during the query. An empty list means the query was successful.""" + def errors(self) -> list[BfabricRequestError]: + """List of errors that occurred during the query. An empty list indicates success.""" return self._errors def extend(self, other: ResultContainer, reset_total_pages_api: bool = False) -> None: From 3dbc8f7538a72d5566c093b0f7aa3f61bfdc0e7a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 08:46:39 +0100 Subject: [PATCH 070/144] fix logging for new scripts package --- src/bfabric/cli_formatting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bfabric/cli_formatting.py b/src/bfabric/cli_formatting.py index ea3dfc8f..35e6595c 100644 --- a/src/bfabric/cli_formatting.py +++ b/src/bfabric/cli_formatting.py @@ -24,8 +24,10 @@ def setup_script_logging(debug: bool = False) -> None: logger.remove() if not (debug or os.environ.get("BFABRICPY_DEBUG")): logger.add(sys.stderr, filter="bfabric", level="INFO", format="{level} {message}") + logger.add(sys.stderr, filter="bfabric_scripts", level="INFO", format="{level} {message}") logger.add(sys.stderr, filter="__main__", level="INFO", format="{level} {message}") else: logger.add(sys.stderr, filter="bfabric", level="DEBUG") + logger.add(sys.stderr, filter="bfabric_scripts", level="DEBUG") logger.add(sys.stderr, filter="__main__", level="DEBUG") os.environ[setup_flag_key] = "1" From a0a5f596e0fcae81bf57d2f789b8639236c57d8f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 09:04:22 +0100 Subject: [PATCH 071/144] update changelog --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index 4785569a..6758924f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -25,6 +25,7 @@ instead a separate Python package with its individual changelog. ### Changed - Except for macOS x86_64 (which we assume is Rosetta emulation nowadays), we use the faster `polars` instead of `polars-lts-cpu`. +- `BfabricRequestError` is now a `RuntimeError` subclass. ### Removed From 0e8807181306423b3b2b0a7b7bb8921c3ac81b68 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 09:22:28 +0100 Subject: [PATCH 072/144] fix mypy errors --- src/bfabric/bfabric.py | 34 ++++++++++----------- src/bfabric/results/response_format_dict.py | 9 ++++++ src/bfabric/results/result_container.py | 10 ++++-- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/bfabric/bfabric.py b/src/bfabric/bfabric.py index 72693604..1c3ba97a 100644 --- a/src/bfabric/bfabric.py +++ b/src/bfabric/bfabric.py @@ -16,7 +16,6 @@ import base64 import importlib.metadata import sys -from contextlib import AbstractContextManager from contextlib import contextmanager from datetime import datetime from enum import Enum @@ -24,6 +23,7 @@ from pathlib import Path from pprint import pprint from typing import Literal, Any +from collections.abc import Generator from loguru import logger from rich.console import Console @@ -112,7 +112,7 @@ def auth(self) -> BfabricAuth: return self._auth @contextmanager - def with_auth(self, auth: BfabricAuth) -> AbstractContextManager[Bfabric]: + def with_auth(self, auth: BfabricAuth) -> Generator[None, None, None]: """Context manager that temporarily (within the scope of the context) sets the authentication for the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental use of the wrong credentials. @@ -167,7 +167,7 @@ def read( logger.debug(f"Requested pages: {requested_pages}") # NOTE: Page numbering starts at 1 - response_items = [] + response_items: list[dict[str, Any]] = [] errors = results.errors page_offset = initial_offset for i_iter, i_page in enumerate(requested_pages): @@ -293,14 +293,14 @@ def __setstate__(self, state): def get_system_auth( - login: str = None, - password: str = None, - base_url: str = None, - config_path: str = None, - config_env: str = None, + login: str | None = None, + password: str | None = None, + base_url: str | None = None, + config_path: str | None = None, + config_env: str | None = None, optional_auth: bool = True, verbose: bool = False, -) -> tuple[BfabricClientConfig, BfabricAuth]: +) -> tuple[BfabricClientConfig, BfabricAuth | None]: """ :param login: Login string for overriding config file :param password: Password for overriding config file @@ -312,23 +312,21 @@ def get_system_auth( otherwise an exception will be raised :param verbose: Verbosity (TODO: resolve potential redundancy with logger) """ - - have_config_path = config_path is not None - config_path = Path(config_path or "~/.bfabricpy.yml").expanduser() + resolved_path = Path(config_path or "~/.bfabricpy.yml").expanduser() # Use the provided config data from arguments instead of the file - if not config_path.is_file(): - if have_config_path: + if not resolved_path.is_file(): + if config_path: # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception - raise OSError(f"Explicitly specified config file does not exist: {config_path}") + raise OSError(f"Explicitly specified config file does not exist: {resolved_path}") # TODO: Convert to log - print(f"Warning: could not find the config file in the default location: {config_path}") + print(f"Warning: could not find the config file in the default location: {resolved_path}") config = BfabricClientConfig(base_url=base_url) - auth = None if login is None and password is None else BfabricAuth(login=login, password=password) + auth = None if login is None or password is None else BfabricAuth(login=login, password=password) # Load config from file, override some of the fields with the provided ones else: - config, auth = read_config(config_path, config_env=config_env) + config, auth = read_config(resolved_path, config_env=config_env) config = config.copy_with(base_url=base_url) if (login is not None) and (password is not None): auth = BfabricAuth(login=login, password=password) diff --git a/src/bfabric/results/response_format_dict.py b/src/bfabric/results/response_format_dict.py index e05bb5e9..18bc8d3f 100644 --- a/src/bfabric/results/response_format_dict.py +++ b/src/bfabric/results/response_format_dict.py @@ -1,6 +1,7 @@ from __future__ import annotations from copy import deepcopy +from typing import Any, overload def sort_dict(d: dict) -> dict: @@ -33,6 +34,14 @@ def _recursive_drop_empty(response_elem: list | dict) -> None: del response_elem[k] +@overload +def drop_empty_elements(response: list[dict[str, Any]], inplace: bool) -> list[dict[str, Any]]: ... + + +@overload +def drop_empty_elements(response: dict[str, Any], inplace: bool) -> dict[str, Any]: ... + + def drop_empty_elements(response: list | dict, inplace: bool = True) -> list | dict: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is diff --git a/src/bfabric/results/result_container.py b/src/bfabric/results/result_container.py index 2988b3c7..94ebb7c3 100644 --- a/src/bfabric/results/result_container.py +++ b/src/bfabric/results/result_container.py @@ -2,7 +2,7 @@ import logging from collections.abc import Iterable -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, overload import bfabric.results.response_format_dict as formatter @@ -32,7 +32,13 @@ def __init__( self._total_pages_api = total_pages_api self._errors = errors or [] - def __getitem__(self, idx: int) -> dict[str, Any]: + @overload + def __getitem__(self, idx: int) -> dict[str, Any]: ... + + @overload + def __getitem__(self, idx: slice) -> list[dict[str, Any]]: ... + + def __getitem__(self, idx: int | slice) -> dict[str, Any] | list[dict[str, Any]]: return self.results[idx] def __iter__(self) -> Iterable[dict[str, Any]]: From 5ab0b0ad067b631fd7c420cda7612b036a6864d2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 09:29:55 +0100 Subject: [PATCH 073/144] add mypy packages in optional env --- pyproject.toml | 59 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 935e7405..866e8e74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,12 +8,12 @@ description = "Python client for the B-Fabric WSDL API" version = "1.13.8" license = { text = "GPL-3.0" } authors = [ - {name = "Christian Panse", email = "cp@fgcz.ethz.ch"}, - {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, - {name = "Aleksejs Fomins"}, - {name = "Marco Schmidt"}, - {name = "Maria d'Errico"}, - {name = "Witold Eryk Wolski"} + { name = "Christian Panse", email = "cp@fgcz.ethz.ch" }, + { name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch" }, + { name = "Aleksejs Fomins" }, + { name = "Marco Schmidt" }, + { name = "Maria d'Errico" }, + { name = "Witold Eryk Wolski" } ] requires-python = ">=3.9" dependencies = [ @@ -44,37 +44,38 @@ dev = [ ] doc = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"] test = ["pytest", "pytest-mock", "logot"] +typing = ["mypy", "types-requests", "lxml-stubs", "pandas-stubs", "types-python-dateutil"] [project.urls] Homepage = "https://github.com/fgcz/bfabricPy" Repository = "https://github.com/fgcz/bfabricPy" [project.scripts] -"bfabric_flask.py"="bfabric_scripts.bfabric_flask:main" +"bfabric_flask.py" = "bfabric_scripts.bfabric_flask:main" #bfabric_feeder_resource_autoQC="bfabric_scripts.bfabric_feeder_resource_autoQC:main" -"bfabric_list_not_existing_storage_directories.py"="bfabric_scripts.bfabric_list_not_existing_storage_directories:main" -"bfabric_list_not_available_proteomics_workunits.py"="bfabric_scripts.bfabric_list_not_available_proteomics_workunits:main" -"bfabric_list_workunit_parameters.py"="bfabric_scripts.bfabric_list_workunit_parameters:main" -"bfabric_upload_resource.py"="bfabric_scripts.bfabric_upload_resource:main" -"bfabric_logthis.py"="bfabric_scripts.bfabric_logthis:main" -"bfabric_setResourceStatus_available.py"="bfabric_scripts.bfabric_setResourceStatus_available:main" -"bfabric_setExternalJobStatus_done.py"="bfabric_scripts.bfabric_setExternalJobStatus_done:main" -"bfabric_setWorkunitStatus_available.py"="bfabric_scripts.bfabric_setWorkunitStatus:main_available" -"bfabric_setWorkunitStatus_processing.py"="bfabric_scripts.bfabric_setWorkunitStatus:main_processing" -"bfabric_setWorkunitStatus_failed.py"="bfabric_scripts.bfabric_setWorkunitStatus:main_failed" -"bfabric_delete.py"="bfabric_scripts.bfabric_delete:main" -"bfabric_read.py"="bfabric_scripts.bfabric_read:main" -"bfabric_read_samples_of_workunit.py"="bfabric_scripts.bfabric_read_samples_of_workunit:main" -"bfabric_read_samples_from_dataset.py"="bfabric_scripts.bfabric_read_samples_from_dataset:main" -"bfabric_save_csv2dataset.py"="bfabric_scripts.bfabric_save_csv2dataset:main" -"bfabric_save_dataset2csv.py"="bfabric_scripts.bfabric_save_dataset2csv:main" -"bfabric_save_fasta.py"="bfabric_scripts.bfabric_save_fasta:main" -"bfabric_save_importresource_sample.py"="bfabric_scripts.bfabric_save_importresource_sample:main" -"bfabric_save_link_to_workunit.py"="bfabric_scripts.bfabric_save_link_to_workunit:main" +"bfabric_list_not_existing_storage_directories.py" = "bfabric_scripts.bfabric_list_not_existing_storage_directories:main" +"bfabric_list_not_available_proteomics_workunits.py" = "bfabric_scripts.bfabric_list_not_available_proteomics_workunits:main" +"bfabric_list_workunit_parameters.py" = "bfabric_scripts.bfabric_list_workunit_parameters:main" +"bfabric_upload_resource.py" = "bfabric_scripts.bfabric_upload_resource:main" +"bfabric_logthis.py" = "bfabric_scripts.bfabric_logthis:main" +"bfabric_setResourceStatus_available.py" = "bfabric_scripts.bfabric_setResourceStatus_available:main" +"bfabric_setExternalJobStatus_done.py" = "bfabric_scripts.bfabric_setExternalJobStatus_done:main" +"bfabric_setWorkunitStatus_available.py" = "bfabric_scripts.bfabric_setWorkunitStatus:main_available" +"bfabric_setWorkunitStatus_processing.py" = "bfabric_scripts.bfabric_setWorkunitStatus:main_processing" +"bfabric_setWorkunitStatus_failed.py" = "bfabric_scripts.bfabric_setWorkunitStatus:main_failed" +"bfabric_delete.py" = "bfabric_scripts.bfabric_delete:main" +"bfabric_read.py" = "bfabric_scripts.bfabric_read:main" +"bfabric_read_samples_of_workunit.py" = "bfabric_scripts.bfabric_read_samples_of_workunit:main" +"bfabric_read_samples_from_dataset.py" = "bfabric_scripts.bfabric_read_samples_from_dataset:main" +"bfabric_save_csv2dataset.py" = "bfabric_scripts.bfabric_save_csv2dataset:main" +"bfabric_save_dataset2csv.py" = "bfabric_scripts.bfabric_save_dataset2csv:main" +"bfabric_save_fasta.py" = "bfabric_scripts.bfabric_save_fasta:main" +"bfabric_save_importresource_sample.py" = "bfabric_scripts.bfabric_save_importresource_sample:main" +"bfabric_save_link_to_workunit.py" = "bfabric_scripts.bfabric_save_link_to_workunit:main" #bfabric_save_resource="bfabric_scripts.bfabric_save_resource:main" -"bfabric_save_workunit_attribute.py"="bfabric_scripts.bfabric_save_workunit_attribute:main" -"bfabric_save_workflowstep.py"="bfabric_scripts.bfabric_save_workflowstep:main" -"bfabric_slurm_queue_status.py"="bfabric_scripts.bfabric_slurm_queue_status:main" +"bfabric_save_workunit_attribute.py" = "bfabric_scripts.bfabric_save_workunit_attribute:main" +"bfabric_save_workflowstep.py" = "bfabric_scripts.bfabric_save_workflowstep:main" +"bfabric_slurm_queue_status.py" = "bfabric_scripts.bfabric_slurm_queue_status:main" [tool.black] line-length = 120 From ced8a8d818243a9110ae5baa15dd5f99a13ebdad Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 10:07:52 +0100 Subject: [PATCH 074/144] fix several mypy errors --- src/bfabric/engine/response_format_suds.py | 20 +++++++---- src/bfabric/entities/application.py | 10 ++++-- src/bfabric/entities/core/entity.py | 3 ++ src/bfabric/entities/core/has_many.py | 16 +++++---- src/bfabric/entities/core/has_one.py | 11 +++--- src/bfabric/entities/externaljob.py | 6 +++- src/bfabric/entities/multiplexkit.py | 8 +++-- src/bfabric/entities/order.py | 7 ++-- src/bfabric/entities/resource.py | 10 ++++-- src/bfabric/entities/workunit.py | 36 ++++++++++++++----- .../experimental/entity_lookup_cache.py | 2 +- 11 files changed, 90 insertions(+), 39 deletions(-) diff --git a/src/bfabric/engine/response_format_suds.py b/src/bfabric/engine/response_format_suds.py index aaf64d54..78748a23 100644 --- a/src/bfabric/engine/response_format_suds.py +++ b/src/bfabric/engine/response_format_suds.py @@ -1,10 +1,15 @@ from __future__ import annotations -from typing import Any + +from typing import Any, TYPE_CHECKING + from suds.sax.text import Text from suds.sudsobject import asdict +if TYPE_CHECKING: + Value = list["Value"] | dict[str, "Value"] | str | int | float | bool | None | Any + -def convert_suds_type(item: Any) -> int | str: +def convert_suds_type(item: Any) -> int | str | Any: """ Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and @@ -17,24 +22,25 @@ def convert_suds_type(item: Any) -> int | str: return item -def suds_asdict_recursive(d, convert_types: bool = False) -> dict: +def suds_asdict_recursive(d, convert_types: bool = False) -> dict[str, Value]: """Convert Suds object into serializable format. https://stackoverflow.com/a/15678861 :param d: The input suds object :param convert_types: A boolean to determine if the simple types return should be cast to python types :return: The suds object converted to an OrderedDict """ - out = {} + out: dict[str, Value] = {} for k, v in asdict(d).items(): if hasattr(v, "__keylist__"): out[k] = suds_asdict_recursive(v, convert_types=convert_types) elif isinstance(v, list): - out[k] = [] + items: list[Value] = [] for item in v: if hasattr(item, "__keylist__"): - out[k].append(suds_asdict_recursive(item, convert_types=convert_types)) + items.append(suds_asdict_recursive(item, convert_types=convert_types)) else: - out[k].append(convert_suds_type(item) if convert_types else item) + items.append(convert_suds_type(item) if convert_types else item) + out[k] = items else: out[k] = convert_suds_type(v) if convert_types else v return out diff --git a/src/bfabric/entities/application.py b/src/bfabric/entities/application.py index 30d74a27..cb4ccceb 100644 --- a/src/bfabric/entities/application.py +++ b/src/bfabric/entities/application.py @@ -1,11 +1,15 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne +if TYPE_CHECKING: + from bfabric.entities.executable import Executable + from bfabric.entities.storage import Storage + class Application(Entity): ENDPOINT = "application" @@ -13,5 +17,5 @@ class Application(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None) -> None: super().__init__(data_dict=data_dict, client=client) - storage = HasOne("Storage", bfabric_field="storage") - executable = HasOne("Executable", bfabric_field="executable") + storage: HasOne[Storage] = HasOne("Storage", bfabric_field="storage") + executable: HasOne[Executable] = HasOne("Executable", bfabric_field="executable") diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index 507fecfe..4012a782 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -27,6 +27,9 @@ def id(self) -> int: @property def web_url(self) -> str: + if self._client is None: + msg = "Cannot generate a web URL without a client's config information." + raise ValueError(msg) return f"{self._client.config.base_url}/{self.ENDPOINT}/show.html?id={self.id}" @property diff --git a/src/bfabric/entities/core/has_many.py b/src/bfabric/entities/core/has_many.py index aa98d658..ab5ca0b2 100644 --- a/src/bfabric/entities/core/has_many.py +++ b/src/bfabric/entities/core/has_many.py @@ -1,16 +1,18 @@ from __future__ import annotations from collections.abc import Iterable +from typing import Generic, TypeVar from polars import DataFrame from bfabric import Bfabric from bfabric.entities.core.entity import Entity - from bfabric.entities.core.relationship import Relationship +E = TypeVar("E", bound=Entity) + -class HasMany(Relationship): +class HasMany(Relationship, Generic[E]): def __init__( self, entity: str, @@ -47,8 +49,8 @@ def _get_ids(self, obj) -> list[int]: return getattr(obj, self._ids_property) -class _HasManyProxy: - def __init__(self, entity_type: type[Entity], ids: list[int], client: Bfabric) -> None: +class _HasManyProxy(Generic[E]): + def __init__(self, entity_type: type[E], ids: list[int], client: Bfabric) -> None: self._entity_type = entity_type self._ids = ids self._client = client @@ -59,7 +61,7 @@ def ids(self) -> list[int]: return self._ids @property - def list(self) -> list[Entity]: + def list(self) -> list[E]: self._load_all() return sorted(self._items.values(), key=lambda x: self._items.keys()) @@ -68,11 +70,11 @@ def polars(self) -> DataFrame: self._load_all() return DataFrame([x.data_dict for x in self._items.values()]) - def __getitem__(self, key: int) -> Entity: + def __getitem__(self, key: int) -> E: self._load_all() return self._items[key] - def __iter__(self) -> Iterable[Entity]: + def __iter__(self) -> Iterable[E]: self._load_all() return iter(sorted(self._items.values(), key=lambda x: self._items.keys())) diff --git a/src/bfabric/entities/core/has_one.py b/src/bfabric/entities/core/has_one.py index b2957999..0bc6fb00 100644 --- a/src/bfabric/entities/core/has_one.py +++ b/src/bfabric/entities/core/has_one.py @@ -1,22 +1,25 @@ from __future__ import annotations -from bfabric.entities.core.entity import Entity +from typing import TypeVar, Generic + from bfabric.entities.core.relationship import Relationship +E = TypeVar("E") + -class HasOne(Relationship): +class HasOne(Relationship, Generic[E]): def __init__(self, entity: str, *, bfabric_field: str, optional: bool = False) -> None: super().__init__(entity) self._bfabric_field = bfabric_field self._optional = optional - def __get__(self, obj, objtype=None) -> Entity | None: + def __get__(self, obj, objtype=None) -> E | None: cache_attr = f"_HasOne__{self._bfabric_field}_cache" if not hasattr(obj, cache_attr): setattr(obj, cache_attr, self._load_entity(obj=obj)) return getattr(obj, cache_attr) - def _load_entity(self, obj) -> Entity | None: + def _load_entity(self, obj) -> E | None: client = obj._client entity_data = obj.data_dict.get(self._bfabric_field) if self._optional and entity_data is None: diff --git a/src/bfabric/entities/externaljob.py b/src/bfabric/entities/externaljob.py index 05f46595..cbcfca38 100644 --- a/src/bfabric/entities/externaljob.py +++ b/src/bfabric/entities/externaljob.py @@ -9,6 +9,7 @@ if TYPE_CHECKING: from bfabric.entities.workunit import Workunit + from bfabric.entities.executable import Executable class ExternalJob(Entity): @@ -17,13 +18,16 @@ class ExternalJob(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None) -> None: super().__init__(data_dict=data_dict, client=client) - executable = HasOne(entity="Executable", bfabric_field="executable") + executable: HasOne[Executable] = HasOne(entity="Executable", bfabric_field="executable") @cached_property def workunit(self) -> Workunit | None: from bfabric.entities.workunit import Workunit if self.data_dict["cliententityclassname"] == "Workunit": + if self._client is None: + raise ValueError("Client must be set to resolve Workunit") + return Workunit.find(id=self.data_dict["cliententityid"], client=self._client) else: return None diff --git a/src/bfabric/entities/multiplexkit.py b/src/bfabric/entities/multiplexkit.py index ed27fd04..6ff4130d 100644 --- a/src/bfabric/entities/multiplexkit.py +++ b/src/bfabric/entities/multiplexkit.py @@ -1,20 +1,24 @@ from __future__ import annotations import polars as pl from functools import cached_property -from typing import Any +from typing import Any, TYPE_CHECKING from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_many import HasMany +if TYPE_CHECKING: + from bfabric.entities.multiplexid import MultiplexId + + class MultiplexKit(Entity): ENDPOINT = "multiplexkit" def __init__(self, data_dict: dict[str, Any], client: Bfabric | None) -> None: super().__init__(data_dict=data_dict, client=client) - multiplex_ids = HasMany("MultiplexId", bfabric_field="multiplexid") + multiplex_ids: HasMany[MultiplexId] = HasMany("MultiplexId", bfabric_field="multiplexid") @cached_property def ids(self) -> pl.DataFrame: diff --git a/src/bfabric/entities/order.py b/src/bfabric/entities/order.py index 43d01534..e5f005d8 100644 --- a/src/bfabric/entities/order.py +++ b/src/bfabric/entities/order.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne +if TYPE_CHECKING: + from bfabric.entities.project import Project + class Order(Entity): ENDPOINT = "order" @@ -13,4 +16,4 @@ class Order(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None) -> None: super().__init__(data_dict=data_dict, client=client) - project = HasOne("Project", bfabric_field="project") + project: HasOne[Project] = HasOne("Project", bfabric_field="project") diff --git a/src/bfabric/entities/resource.py b/src/bfabric/entities/resource.py index 27b938d7..b43b4d5e 100644 --- a/src/bfabric/entities/resource.py +++ b/src/bfabric/entities/resource.py @@ -1,11 +1,15 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne +if TYPE_CHECKING: + from bfabric.entities.storage import Storage + from bfabric.entities.workunit import Workunit + class Resource(Entity): ENDPOINT = "resource" @@ -13,5 +17,5 @@ class Resource(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None = None) -> None: super().__init__(data_dict=data_dict, client=client) - storage = HasOne("Storage", bfabric_field="storage") - workunit = HasOne("Workunit", bfabric_field="workunit") + storage: HasOne[Storage] = HasOne("Storage", bfabric_field="storage") + workunit: HasOne[Workunit] = HasOne("Workunit", bfabric_field="workunit") diff --git a/src/bfabric/entities/workunit.py b/src/bfabric/entities/workunit.py index e18e6a7c..451005c4 100644 --- a/src/bfabric/entities/workunit.py +++ b/src/bfabric/entities/workunit.py @@ -11,8 +11,13 @@ from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: - from bfabric.entities.project import Project + from bfabric.entities.application import Application + from bfabric.entities.dataset import Dataset + from bfabric.entities.externaljob import ExternalJob from bfabric.entities.order import Order + from bfabric.entities.parameter import Parameter + from bfabric.entities.project import Project + from bfabric.entities.resource import Resource class Workunit(Entity): @@ -25,12 +30,12 @@ class Workunit(Entity): def __init__(self, data_dict: dict[str, Any], client: Bfabric | None = None) -> None: super().__init__(data_dict=data_dict, client=client) - application = HasOne(entity="Application", bfabric_field="application") - parameters = HasMany(entity="Parameter", bfabric_field="parameter") - resources = HasMany(entity="Resource", bfabric_field="resource") - input_resources = HasMany(entity="Resource", bfabric_field="inputresource", optional=True) - input_dataset = HasOne(entity="Dataset", bfabric_field="inputdataset", optional=True) - external_jobs = HasMany(entity="ExternalJob", bfabric_field="externaljob", optional=True) + application: HasOne[Application] = HasOne(entity="Application", bfabric_field="application") + parameters: HasMany[Parameter] = HasMany(entity="Parameter", bfabric_field="parameter") + resources: HasMany[Resource] = HasMany(entity="Resource", bfabric_field="resource") + input_resources: HasMany[Resource] = HasMany(entity="Resource", bfabric_field="inputresource", optional=True) + input_dataset: HasOne[Dataset] = HasOne(entity="Dataset", bfabric_field="inputdataset", optional=True) + external_jobs: HasMany[ExternalJob] = HasMany(entity="ExternalJob", bfabric_field="externaljob", optional=True) @cached_property def parameter_values(self) -> dict[str, Any]: @@ -41,16 +46,29 @@ def container(self) -> Project | Order: from bfabric.entities.project import Project from bfabric.entities.order import Order + if self._client is None: + raise ValueError("Cannot determine the container without a client.") + + result: Project | Order | None if self.data_dict["container"]["classname"] == Project.ENDPOINT: - return Project.find(id=self.data_dict["container"]["id"], client=self._client) + result = Project.find(id=self.data_dict["container"]["id"], client=self._client) elif self.data_dict["container"]["classname"] == Order.ENDPOINT: - return Order.find(id=self.data_dict["container"]["id"], client=self._client) + result = Order.find(id=self.data_dict["container"]["id"], client=self._client) else: raise ValueError(f"Unknown container classname: {self.data_dict['container']['classname']}") + if result is None: + raise ValueError(f"Could not find container with ID {self.data_dict['container']['id']}") + + return result + @cached_property def store_output_folder(self) -> Path: """Relative path in the storage for the workunit output.""" + if self.application is None: + raise ValueError("Cannot determine the storage path without an application.") + if self.application.storage is None: + raise ValueError("Cannot determine the storage path without an application storage configuration.") date = dateutil.parser.parse(self.data_dict["created"]) return Path( f"{self.application.storage['projectfolderprefix']}{self.container.id}", diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index ed96aeb1..7429f1ca 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -71,7 +71,7 @@ def get_all(self, entity_type: type[Entity], entity_ids: list[int]) -> dict[int, if self.contains(entity_type, entity_id) } - def put(self, entity_type: type[Entity], entity_id: int, entity: Entity) -> None: + def put(self, entity_type: type[Entity], entity_id: int, entity: Entity | None) -> None: """Puts an entity with the given type and ID into the cache.""" logger.debug(f"Caching entity {entity_type} with ID {entity_id}") self._caches[entity_type].put(entity_id, entity) From b7637492a2dcead13328368d27e2c2d20e8a917e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 10:46:19 +0100 Subject: [PATCH 075/144] fix more mypy errors --- src/bfabric/entities/core/entity.py | 4 ++-- src/bfabric/entities/core/has_many.py | 12 +++++++----- src/bfabric/entities/core/has_one.py | 7 ++++--- src/bfabric/entities/core/relationship.py | 7 +++++-- src/bfabric/experimental/entity_lookup_cache.py | 7 +++++-- src/bfabric/results/result_container.py | 4 ++-- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index 4012a782..543274c8 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -47,13 +47,13 @@ def find(cls, id: int, client: Bfabric) -> Self | None: """Finds an entity by its ID, if it does not exist `None` is returned.""" cache = EntityLookupCache.instance() if cache and cache.contains(entity_type=cls, entity_id=id): - entity = cache.get(entity_type=cls, entity_id=id) + return cache.get(entity_type=cls, entity_id=id) else: result = client.read(cls.ENDPOINT, obj={"id": int(id)}) entity = cls(result[0], client=client) if len(result) == 1 else None if cache: cache.put(entity_type=cls, entity_id=id, entity=entity) - return entity + return entity @classmethod def find_all(cls, ids: list[int], client: Bfabric) -> dict[int, Self]: diff --git a/src/bfabric/entities/core/has_many.py b/src/bfabric/entities/core/has_many.py index ab5ca0b2..a044e29c 100644 --- a/src/bfabric/entities/core/has_many.py +++ b/src/bfabric/entities/core/has_many.py @@ -12,7 +12,7 @@ E = TypeVar("E", bound=Entity) -class HasMany(Relationship, Generic[E]): +class HasMany(Relationship[E]): def __init__( self, entity: str, @@ -37,16 +37,18 @@ def __get__(self, obj, objtype=None) -> _HasManyProxy: return getattr(obj, cache_attr) def _get_ids(self, obj) -> list[int]: - if (self._bfabric_field is None) == (self._ids_property is None): - raise ValueError("Exactly one of bfabric_field and ids_property must be set") if self._bfabric_field is not None: + if self._ids_property is not None: + raise ValueError("Exactly one of bfabric_field and ids_property must be set, but both are set") if self._optional and self._bfabric_field not in obj.data_dict: return [] return [x["id"] for x in obj.data_dict[self._bfabric_field]] - else: + elif self._ids_property is not None: if self._optional and not hasattr(obj, self._ids_property): return [] return getattr(obj, self._ids_property) + else: + raise ValueError("Exactly one of bfabric_field and ids_property must be set, but neither is set") class _HasManyProxy(Generic[E]): @@ -54,7 +56,7 @@ def __init__(self, entity_type: type[E], ids: list[int], client: Bfabric) -> Non self._entity_type = entity_type self._ids = ids self._client = client - self._items = {} + self._items: dict[int, E] = {} @property def ids(self) -> list[int]: diff --git a/src/bfabric/entities/core/has_one.py b/src/bfabric/entities/core/has_one.py index 0bc6fb00..88366e57 100644 --- a/src/bfabric/entities/core/has_one.py +++ b/src/bfabric/entities/core/has_one.py @@ -1,13 +1,14 @@ from __future__ import annotations -from typing import TypeVar, Generic +from typing import TypeVar +from bfabric.entities.core.entity import Entity from bfabric.entities.core.relationship import Relationship -E = TypeVar("E") +E = TypeVar("E", bound=Entity) -class HasOne(Relationship, Generic[E]): +class HasOne(Relationship[E]): def __init__(self, entity: str, *, bfabric_field: str, optional: bool = False) -> None: super().__init__(entity) self._bfabric_field = bfabric_field diff --git a/src/bfabric/entities/core/relationship.py b/src/bfabric/entities/core/relationship.py index ada1e734..05219cac 100644 --- a/src/bfabric/entities/core/relationship.py +++ b/src/bfabric/entities/core/relationship.py @@ -2,16 +2,19 @@ import importlib from functools import cached_property +from typing import TypeVar, Generic from bfabric.entities.core.entity import Entity +E = TypeVar("E", bound=Entity) -class Relationship: + +class Relationship(Generic[E]): def __init__(self, entity: str) -> None: self._entity_type_name = entity @cached_property - def _entity_type(self) -> type[Entity]: + def _entity_type(self) -> type[E]: return importlib.import_module(f"bfabric.entities.{self._entity_type_name.lower()}").__dict__[ self._entity_type_name ] diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index 7429f1ca..93341a49 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -3,7 +3,7 @@ from collections import defaultdict, OrderedDict from collections.abc import Hashable from contextlib import contextmanager -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, TypeVar from loguru import logger @@ -38,6 +38,9 @@ def __contains__(self, key: Hashable) -> bool: return key in self._entries +E = TypeVar("E", bound=Entity) + + class EntityLookupCache: """Implements the logic for caching entity lookup. @@ -53,7 +56,7 @@ def contains(self, entity_type: type[Entity], entity_id: int) -> bool: """Returns whether the cache contains an entity with the given type and ID.""" return entity_id in self._caches[entity_type] - def get(self, entity_type: type[Entity], entity_id: int) -> Entity | None: + def get(self, entity_type: type[E], entity_id: int) -> E | None: """Returns the entity with the given type and ID, if it exists in the cache.""" if self._caches[entity_type].get(entity_id): logger.debug(f"Cache hit for entity {entity_type} with ID {entity_id}") diff --git a/src/bfabric/results/result_container.py b/src/bfabric/results/result_container.py index 94ebb7c3..ee25b55a 100644 --- a/src/bfabric/results/result_container.py +++ b/src/bfabric/results/result_container.py @@ -1,8 +1,8 @@ from __future__ import annotations import logging -from collections.abc import Iterable from typing import Any, TYPE_CHECKING, overload +from collections.abc import Iterator import bfabric.results.response_format_dict as formatter @@ -41,7 +41,7 @@ def __getitem__(self, idx: slice) -> list[dict[str, Any]]: ... def __getitem__(self, idx: int | slice) -> dict[str, Any] | list[dict[str, Any]]: return self.results[idx] - def __iter__(self) -> Iterable[dict[str, Any]]: + def __iter__(self) -> Iterator[dict[str, Any]]: return iter(self.results) def __repr__(self) -> str: From 24d44978f539d70659475b513acdff7a6884c86b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 10:55:28 +0100 Subject: [PATCH 076/144] fix more mypy errors --- src/bfabric/entities/core/has_many.py | 4 ++-- .../experimental/entity_lookup_cache.py | 22 +++++++++---------- .../experimental/workunit_definition.py | 10 +++++++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/bfabric/entities/core/has_many.py b/src/bfabric/entities/core/has_many.py index a044e29c..5be7c086 100644 --- a/src/bfabric/entities/core/has_many.py +++ b/src/bfabric/entities/core/has_many.py @@ -1,7 +1,7 @@ from __future__ import annotations -from collections.abc import Iterable from typing import Generic, TypeVar +from collections.abc import Iterator from polars import DataFrame @@ -76,7 +76,7 @@ def __getitem__(self, key: int) -> E: self._load_all() return self._items[key] - def __iter__(self) -> Iterable[E]: + def __iter__(self) -> Iterator[E]: self._load_all() return iter(sorted(self._items.values(), key=lambda x: self._items.keys())) diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index 93341a49..cc82e495 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -3,22 +3,24 @@ from collections import defaultdict, OrderedDict from collections.abc import Hashable from contextlib import contextmanager -from typing import Any, TYPE_CHECKING, TypeVar +from typing import Any, TYPE_CHECKING, TypeVar, Generic from loguru import logger if TYPE_CHECKING: from bfabric.entities.core.entity import Entity +T = TypeVar("T") -class Cache: + +class Cache(Generic[T]): """A FIFO cache with a maximum size, implemented by an OrderedDict.""" def __init__(self, max_size: int) -> None: - self._entries = OrderedDict() + self._entries: OrderedDict[Hashable, T] = OrderedDict() self._max_size = max_size - def get(self, key: Hashable) -> Any | None: + def get(self, key: Hashable) -> T | None: """Returns the value with the given key, if it exists, and marks it as used. If the key does not exist, returns None. @@ -27,7 +29,7 @@ def get(self, key: Hashable) -> Any | None: self._entries.move_to_end(key) return self._entries[key] - def put(self, key: Hashable, value: Any) -> None: + def put(self, key: Hashable, value: T) -> None: """Puts a key-value pair into the cache, marking it as used.""" if self._max_size != 0 and len(self._entries) >= self._max_size: self._entries.popitem(last=False) @@ -50,7 +52,7 @@ class EntityLookupCache: __class_instance = None def __init__(self, max_size: int = 0) -> None: - self._caches = defaultdict(lambda: Cache(max_size=max_size)) + self._caches: dict[type[Entity], Cache[Entity | None]] = defaultdict(lambda: Cache(max_size=max_size)) def contains(self, entity_type: type[Entity], entity_id: int) -> bool: """Returns whether the cache contains an entity with the given type and ID.""" @@ -63,16 +65,14 @@ def get(self, entity_type: type[E], entity_id: int) -> E | None: return self._caches[entity_type].get(entity_id) else: logger.debug(f"Cache miss for entity {entity_type} with ID {entity_id}") + return None def get_all(self, entity_type: type[Entity], entity_ids: list[int]) -> dict[int, Entity]: """Returns a dictionary of entities with the given type and IDs, containing only the entities that exist in the cache. """ - return { - entity_id: self.get(entity_type, entity_id) - for entity_id in entity_ids - if self.contains(entity_type, entity_id) - } + results = {entity_id: self.get(entity_type, entity_id) for entity_id in entity_ids} + return {entity_id: result for entity_id, result in results.items() if result is not None} def put(self, entity_type: type[Entity], entity_id: int, entity: Entity | None) -> None: """Puts an entity with the given type and ID into the cache.""" diff --git a/src/bfabric/experimental/workunit_definition.py b/src/bfabric/experimental/workunit_definition.py index 5daeceda..54bbc76a 100644 --- a/src/bfabric/experimental/workunit_definition.py +++ b/src/bfabric/experimental/workunit_definition.py @@ -32,6 +32,10 @@ def mutually_exclusive_dataset_resources(self) -> WorkunitExecutionDefinition: @classmethod def from_workunit(cls, workunit: Workunit) -> WorkunitExecutionDefinition: """Loads the workunit execution definition from the provided B-Fabric workunit.""" + if workunit.application is None: + raise ValueError("Workunit does not have an application") + if workunit.application.executable is None: + raise ValueError("Workunit application does not have an executable") data = { "raw_parameters": workunit.parameter_values, "executable": workunit.application.executable["program"], @@ -86,8 +90,10 @@ def from_ref(cls, workunit: Path | int, client: Bfabric, cache_file: Path | None if isinstance(workunit, Path): result = cls.from_yaml(workunit) else: - workunit = Workunit.find(id=workunit, client=client) - result = cls.from_workunit(workunit) + workunit_instance = Workunit.find(id=workunit, client=client) + if workunit_instance is None: + raise ValueError(f"Workunit with ID {workunit} does not exist") + result = cls.from_workunit(workunit=workunit_instance) if cache_file is not None: cache_file.parent.mkdir(exist_ok=True, parents=True) result.to_yaml(cache_file) From 17bb07fff65277826be534d97ee71d2f2689b187 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 11:18:12 +0100 Subject: [PATCH 077/144] py3.9 compat --- src/bfabric/entities/core/has_many.py | 4 ++-- src/bfabric/entities/core/has_one.py | 4 ++-- src/bfabric/entities/core/relationship.py | 4 ++-- src/bfabric/experimental/entity_lookup_cache.py | 8 +++----- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/bfabric/entities/core/has_many.py b/src/bfabric/entities/core/has_many.py index 5be7c086..e0f4456c 100644 --- a/src/bfabric/entities/core/has_many.py +++ b/src/bfabric/entities/core/has_many.py @@ -6,10 +6,10 @@ from polars import DataFrame from bfabric import Bfabric -from bfabric.entities.core.entity import Entity +from bfabric.entities.core.entity import Entity # type: ignore from bfabric.entities.core.relationship import Relationship -E = TypeVar("E", bound=Entity) +E = TypeVar("E", bound="Entity") class HasMany(Relationship[E]): diff --git a/src/bfabric/entities/core/has_one.py b/src/bfabric/entities/core/has_one.py index 88366e57..5211876b 100644 --- a/src/bfabric/entities/core/has_one.py +++ b/src/bfabric/entities/core/has_one.py @@ -2,10 +2,10 @@ from typing import TypeVar -from bfabric.entities.core.entity import Entity +from bfabric.entities.core.entity import Entity # type: ignore from bfabric.entities.core.relationship import Relationship -E = TypeVar("E", bound=Entity) +E = TypeVar("E", bound="Entity") class HasOne(Relationship[E]): diff --git a/src/bfabric/entities/core/relationship.py b/src/bfabric/entities/core/relationship.py index 05219cac..0938ad30 100644 --- a/src/bfabric/entities/core/relationship.py +++ b/src/bfabric/entities/core/relationship.py @@ -4,9 +4,9 @@ from functools import cached_property from typing import TypeVar, Generic -from bfabric.entities.core.entity import Entity +from bfabric.entities.core.entity import Entity # type: ignore -E = TypeVar("E", bound=Entity) +E = TypeVar("E", bound="Entity") class Relationship(Generic[E]): diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index cc82e495..dbbb2612 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -3,14 +3,15 @@ from collections import defaultdict, OrderedDict from collections.abc import Hashable from contextlib import contextmanager -from typing import Any, TYPE_CHECKING, TypeVar, Generic +from typing import TypeVar, Generic, TYPE_CHECKING from loguru import logger if TYPE_CHECKING: - from bfabric.entities.core.entity import Entity + from bfabric.entities.core.entity import Entity # type: ignore T = TypeVar("T") +E = TypeVar("E", bound="Entity") class Cache(Generic[T]): @@ -40,9 +41,6 @@ def __contains__(self, key: Hashable) -> bool: return key in self._entries -E = TypeVar("E", bound=Entity) - - class EntityLookupCache: """Implements the logic for caching entity lookup. From 91345a1b3c7aed436c4f301f7ecf957b565a3710 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 11:43:55 +0100 Subject: [PATCH 078/144] fix mocking problem --- tests/bfabric/entities/test_workunit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bfabric/entities/test_workunit.py b/tests/bfabric/entities/test_workunit.py index 0e09d46a..8798cd6f 100644 --- a/tests/bfabric/entities/test_workunit.py +++ b/tests/bfabric/entities/test_workunit.py @@ -98,7 +98,7 @@ def test_store_output_folder(mocker, mock_workunit) -> None: mock_application = mocker.MagicMock(storage={"projectfolderprefix": "xyz"}) mock_application.__getitem__.side_effect = {"technology": "tech", "name": "my app"}.__getitem__ mocker.patch.object(mock_workunit, "application", mock_application) - mocker.patch.object(mock_workunit, "container").id = 12 + mocker.patch.object(Workunit, "container", mocker.PropertyMock(return_value=mocker.MagicMock(id=12))) assert Path("xyz12/bfabric/tech/my_app/2024/2024-01/2024-01-02/workunit_30000") == mock_workunit.store_output_folder From d315c9bfca2bd32930d6ecd45c83b6f7ce4c8501 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 17:00:45 +0100 Subject: [PATCH 079/144] set uv reinstall to avoid confusion --- app_runner/pyproject.toml | 10 ++++++++++ pyproject.toml | 3 +++ 2 files changed, 13 insertions(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 2b619475..96963882 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -26,3 +26,13 @@ doc = [ "sphinx-book-theme", "sphinx-autobuild", ] + +[tool.uv] +reinstall-package = ["app_runner"] + +[tool.black] +line-length = 120 + +[tool.ruff] +line-length = 120 +indent-width = 4 diff --git a/pyproject.toml b/pyproject.toml index 866e8e74..670fe1fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,9 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_save_workflowstep.py" = "bfabric_scripts.bfabric_save_workflowstep:main" "bfabric_slurm_queue_status.py" = "bfabric_scripts.bfabric_slurm_queue_status:main" +[tool.uv] +reinstall-package = ["bfabric"] + [tool.black] line-length = 120 target-version = ["py39"] From 8ae0cbaa5997e1e8d4da0e3d1e6a6cb8044f8bc2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 17:22:40 +0100 Subject: [PATCH 080/144] add py.typed --- docs/changelog.md | 1 + pyproject.toml | 4 ++-- src/bfabric/py.typed | 0 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 src/bfabric/py.typed diff --git a/docs/changelog.md b/docs/changelog.md index 6758924f..0b16c4e1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -26,6 +26,7 @@ instead a separate Python package with its individual changelog. - Except for macOS x86_64 (which we assume is Rosetta emulation nowadays), we use the faster `polars` instead of `polars-lts-cpu`. - `BfabricRequestError` is now a `RuntimeError` subclass. +- Use flit as build backend and add `py.typed` marker. ### Removed diff --git a/pyproject.toml b/pyproject.toml index 670fe1fc..52df0745 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["setuptools >= 61.0"] -build-backend = "setuptools.build_meta" +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" [project] name = "bfabric" diff --git a/src/bfabric/py.typed b/src/bfabric/py.typed new file mode 100644 index 00000000..e69de29b From d1b63c4b0332a60d43d5b0fd92aead0b0b893944 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 31 Oct 2024 17:24:16 +0100 Subject: [PATCH 081/144] add py.typed marker --- app_runner/src/app_runner/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 app_runner/src/app_runner/py.typed diff --git a/app_runner/src/app_runner/py.typed b/app_runner/src/app_runner/py.typed new file mode 100644 index 00000000..e69de29b From 4cde06252a394eb65324eab808f3037fc8ee9000 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 13:40:53 +0100 Subject: [PATCH 082/144] add session to build documentation --- noxfile.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/noxfile.py b/noxfile.py index 01abf6f1..100ee323 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,4 +1,8 @@ import nox +import shutil +from tempfile import TemporaryDirectory +from pathlib import Path + nox.options.default_venv_backend = "uv" @@ -8,3 +12,21 @@ def tests(session): session.install(".[test]") session.run("uv", "pip", "list") session.run("pytest") + + +@nox.session(default=False) +def docs(session): + """Builds documentation for bfabricPy and app-runner and writes to site directory.""" + with TemporaryDirectory() as tmpdir: + session.install(".[doc]") + session.run("mkdocs", "build", "-d", Path(tmpdir) / "build_bfabricpy") + + session.install("./app_runner[doc]") + session.run("sphinx-build", "-M", "html", "app_runner/docs", Path(tmpdir) / "build_app_runner") + + target_dir = Path("site") + if target_dir.exists(): + shutil.rmtree(target_dir) + + shutil.copytree(Path(tmpdir) / "build_bfabricpy", target_dir) + shutil.copytree(Path(tmpdir) / "build_app_runner" / "html", target_dir / "app_runner") From a61bacf2acc6a69e58ac0c9081b5103c791744b2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 13:52:03 +0100 Subject: [PATCH 083/144] add command to publish the docs --- noxfile.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/noxfile.py b/noxfile.py index 100ee323..e0027ac4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -30,3 +30,14 @@ def docs(session): shutil.copytree(Path(tmpdir) / "build_bfabricpy", target_dir) shutil.copytree(Path(tmpdir) / "build_app_runner" / "html", target_dir / "app_runner") + + +@nox.session(default=False) +def publish_docs(session): + """Publish documentation to GitHub Pages by updating gh-pages branch.""" + site_dir = Path("site") + if not site_dir.exists(): + session.error("Site directory does not exist. Run 'nox -s docs' first.") + + session.install("ghp-import") + session.run("ghp-import", "--force", "--no-jekyll", "--push", "site") From 660c2186ecf2dc5baf88b1ba0b53664f41f388cb Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 13:54:36 +0100 Subject: [PATCH 084/144] link the docs --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f3670939..7c901644 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,10 @@ ## Documentation -You can find the up-to-date documentation at [https://fgcz.github.io/bfabricPy](https://fgcz.github.io/bfabricPy). +| Package | Link | +| ---------- | -------------------------------------------------------------------- | +| bfabricPy | [https://fgcz.github.io/bfabricPy](https://fgcz.github.io/bfabricPy) | +| app-runner | https://fgcz.github.io/bfabricPy/app_runner/ | ## Introduction From 15950abfba62a4df5329f047cca0be5fc12c4e0b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 14:10:29 +0100 Subject: [PATCH 085/144] revert to setuptools for namespace package support --- docs/changelog.md | 2 +- pyproject.toml | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 0b16c4e1..2f36dc8c 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -26,7 +26,7 @@ instead a separate Python package with its individual changelog. - Except for macOS x86_64 (which we assume is Rosetta emulation nowadays), we use the faster `polars` instead of `polars-lts-cpu`. - `BfabricRequestError` is now a `RuntimeError` subclass. -- Use flit as build backend and add `py.typed` marker. +- Add `py.typed` marker. ### Removed diff --git a/pyproject.toml b/pyproject.toml index 52df0745..59588b99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["flit_core >=3.2,<4"] -build-backend = "flit_core.buildapi" +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" [project] name = "bfabric" @@ -77,8 +77,11 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_save_workflowstep.py" = "bfabric_scripts.bfabric_save_workflowstep:main" "bfabric_slurm_queue_status.py" = "bfabric_scripts.bfabric_slurm_queue_status:main" +[tool.setuptools.package-data] +"*" = ["py.typed"] + [tool.uv] -reinstall-package = ["bfabric"] +reinstall-package = ["bfabric", "bfabric_scripts"] [tool.black] line-length = 120 From ff5d18b38bbc0172129a18a5ebaf8e108bd955ef Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 14:21:30 +0100 Subject: [PATCH 086/144] test the presence of the py.typed file --- noxfile.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/noxfile.py b/noxfile.py index e0027ac4..5a2400b4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -14,6 +14,21 @@ def tests(session): session.run("pytest") +@nox.session +def test_py_typed(session): + """Verify py.typed is properly installed with the package.""" + session.install(".") + result = session.run( + "python", + "-c", + "import bfabric, pathlib; p=pathlib.Path(bfabric.__file__).parent/'py.typed'; print(p.exists())", + silent=True, + stderr=None, + ) + if not result or result.strip() != "True": + session.error("py.typed not found in installed package") + + @nox.session(default=False) def docs(session): """Builds documentation for bfabricPy and app-runner and writes to site directory.""" From d5f1504df28c3ab7aaf6d6de8d472e6c90740df6 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 14:36:15 +0100 Subject: [PATCH 087/144] rename _spec to spec --- app_runner/src/app_runner/cli/outputs.py | 2 +- app_runner/src/app_runner/cli/validate.py | 2 +- app_runner/src/app_runner/output_registration/register.py | 2 +- .../src/app_runner/output_registration/{_spec.py => spec.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename app_runner/src/app_runner/output_registration/{_spec.py => spec.py} (100%) diff --git a/app_runner/src/app_runner/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py index 43a95d9d..cf5db96c 100644 --- a/app_runner/src/app_runner/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -7,7 +7,7 @@ from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from bfabric.entities import Workunit -from app_runner.output_registration._spec import OutputsSpec +from app_runner.output_registration.spec import OutputsSpec from app_runner.output_registration.register import register_all app_outputs = cyclopts.App("outputs", help="Register output files for an app.") diff --git a/app_runner/src/app_runner/cli/validate.py b/app_runner/src/app_runner/cli/validate.py index 3caf0cad..96ca35f4 100644 --- a/app_runner/src/app_runner/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -9,7 +9,7 @@ from app_runner.app_runner._spec import AppSpec from app_runner.input_preparation.spec import InputsSpec -from app_runner.output_registration._spec import OutputsSpec +from app_runner.output_registration.spec import OutputsSpec app_validate = cyclopts.App("validate", help="Validate yaml files.") diff --git a/app_runner/src/app_runner/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py index f2619925..428dd053 100644 --- a/app_runner/src/app_runner/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -6,7 +6,7 @@ from bfabric import Bfabric from bfabric.entities import Storage, Workunit -from app_runner.output_registration._spec import ( +from app_runner.output_registration.spec import ( CopyResourceSpec, UpdateExisting, OutputsSpec, diff --git a/app_runner/src/app_runner/output_registration/_spec.py b/app_runner/src/app_runner/output_registration/spec.py similarity index 100% rename from app_runner/src/app_runner/output_registration/_spec.py rename to app_runner/src/app_runner/output_registration/spec.py From 86b00532228315225c8d6009e5869a17c5a8aac2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 14:36:28 +0100 Subject: [PATCH 088/144] add initial docs on output_specification.md --- app_runner/docs/specs/output_specification.md | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/app_runner/docs/specs/output_specification.md b/app_runner/docs/specs/output_specification.md index db7f5592..b6afd723 100644 --- a/app_runner/docs/specs/output_specification.md +++ b/app_runner/docs/specs/output_specification.md @@ -5,4 +5,56 @@ The file is usually called `outputs.yml` and lists the different output files, w ### General structure -To be described. +Generally the structure is a yaml file containing a key `outputs` which is a list of dictionaries, each representing an +output file. +Each output has a `type` key which identifies the output type. +This will allow us to extend this logic to different sources in the future. + +An example file could look like: + +```yaml +outputs: +- type: bfabric_copy_resource + local_path: /tmp/work/hello.txt + store_entry_path: WU123456_hello.txt +- type: bfabric_dataset + local_path: /tmp/work/hello.csv + separator: "," + name: Hello Dataset +``` + +## Commands + +### Validation + +The output file can be validated with the command: + +```bash +bfabric-app-runner validate outputs-spec outputs.yml +``` + +Which on success will output a pretty-printed version of the outputs file. +Validation will also be performed by all other commands, so this is not strictly necessary. + +### Register files + +To perform the registration to B-Fabric the following can be used: + +```bash +bfabric-app-runner outputs register outputs.yml --workunit-id 1234 +``` + +Please note: + +- The workunit ID needs to be specified, so the correct information can be retrieved. (TODO but instead of the workunit id it should also be possible to pass the ref) +- Several actions might require a particular user to be possible, e.g. the `bfabric_copy_resource` will require a user + with permission to create the particular file over SSH. + +## Reference + +```{eval-rst} +.. automodule:: app_runner.output_registration.spec + :members: + :undoc-members: + :show-inheritance: +``` From b1ba252c06ed717189682ed61a3926f81b294482 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 14:49:27 +0100 Subject: [PATCH 089/144] add a few notes on the workunit_definition --- app_runner/docs/index.md | 1 + app_runner/docs/workunit_definition.md | 20 +++++++++++++++++++ .../experimental/workunit_definition.py | 1 + 3 files changed, 22 insertions(+) create mode 100644 app_runner/docs/workunit_definition.md diff --git a/app_runner/docs/index.md b/app_runner/docs/index.md index 038cc646..18b73216 100644 --- a/app_runner/docs/index.md +++ b/app_runner/docs/index.md @@ -1,5 +1,6 @@ ```{toctree} :glob: +workunit_definition specs/input_specification specs/output_specification specs/app_specification diff --git a/app_runner/docs/workunit_definition.md b/app_runner/docs/workunit_definition.md new file mode 100644 index 00000000..7f449074 --- /dev/null +++ b/app_runner/docs/workunit_definition.md @@ -0,0 +1,20 @@ +## Workunit Definition + +The idea of the workunit definition is to provide a persistable and comprehensive description of a workunit. +To keep the logic even more modular it is separated into two components, the `execution` and the `registration` +information. + +### Creating WorkunitDefinition instances + +The `WorkunitDefinition` class is a Pydantic model and can be created by passing a dictionary to the constructor. +However, for convenience and easier integration into command line tools there is a constructor for both creating an +instance from a Bfabric entity, and parsing a YAML file which contains a persisted version of the workunit + +### Reference + +```{eval-rst} +.. automodule:: bfabric.experimental.workunit_definition + :members: + :undoc-members: + :show-inheritance: +``` diff --git a/src/bfabric/experimental/workunit_definition.py b/src/bfabric/experimental/workunit_definition.py index 54bbc76a..4b97a903 100644 --- a/src/bfabric/experimental/workunit_definition.py +++ b/src/bfabric/experimental/workunit_definition.py @@ -16,6 +16,7 @@ class WorkunitExecutionDefinition(BaseModel): model_config = ConfigDict(extra="forbid") raw_parameters: dict[str, str | None] + # TODO drop the execuctable executable: Path dataset: int | None = None resources: list[int] = [] From e82c7da0ed3536648b642b537df58717cd112408 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:09:00 +0100 Subject: [PATCH 090/144] add forgotten, but required flags --- app_runner/src/app_runner/output_registration/spec.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app_runner/src/app_runner/output_registration/spec.py b/app_runner/src/app_runner/output_registration/spec.py index 0c7276d2..01f7ccbc 100644 --- a/app_runner/src/app_runner/output_registration/spec.py +++ b/app_runner/src/app_runner/output_registration/spec.py @@ -38,6 +38,8 @@ class SaveDatasetSpec(BaseModel): local_path: Path separator: str name: str | None = None + has_header: bool = True + invalid_characters: str = "" SpecType = Union[CopyResourceSpec, SaveDatasetSpec] From cd45e90419f04ff8ea90600f31cda205d396692f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:09:12 +0100 Subject: [PATCH 091/144] add py.typed file --- src/bfabric_scripts/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/bfabric_scripts/py.typed diff --git a/src/bfabric_scripts/py.typed b/src/bfabric_scripts/py.typed new file mode 100644 index 00000000..e69de29b From 46b764b7443ec5c1904f887e95fa32744afb2bfd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:09:43 +0100 Subject: [PATCH 092/144] add mypy --- app_runner/pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 96963882..5a141d87 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -26,6 +26,11 @@ doc = [ "sphinx-book-theme", "sphinx-autobuild", ] +dev = [ + "app_runner[doc]", + "mypy", + "types-PyYAML", +] [tool.uv] reinstall-package = ["app_runner"] From c2a54747694fa8fc76c6c37e4696aa186a2ae64e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:35:44 +0100 Subject: [PATCH 093/144] deps --- app_runner/pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 5a141d87..86a19417 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -13,6 +13,8 @@ authors = [ requires-python = ">=3.12" dependencies = [ "bfabric @ git+https://github.com/fgcz/bfabricPy.git@main", + "pydantic", + "glom", ] [project.scripts] From d8bf11d8b0a4a5208ea9706ca3297a74afc90bf8 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:36:49 +0100 Subject: [PATCH 094/144] initial use of glom --- app_runner/src/app_runner/output_registration/register.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app_runner/src/app_runner/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py index 428dd053..6898ff46 100644 --- a/app_runner/src/app_runner/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -16,6 +16,7 @@ from app_runner.util.checksums import md5sum from app_runner.util.scp import scp from bfabric_scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset +from glom import glom def _get_output_folder(spec: CopyResourceSpec, workunit: Workunit) -> Path: @@ -90,7 +91,7 @@ def register_all( for spec in specs_list: logger.debug(f"Registering {spec}") if isinstance(spec, CopyResourceSpec): - storage = workunit.application.storage + storage = glom(workunit, "application.storage") copy_file_to_storage(spec, workunit=workunit, storage=storage, ssh_user=ssh_user) if not default_resource_was_reused: resource_id = find_default_resource_id(workunit=workunit) From fd2686acec2c54d85c48daa9cf2791afff1995cf Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:36:53 +0100 Subject: [PATCH 095/144] mypy --- app_runner/src/app_runner/util/scp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/src/app_runner/util/scp.py b/app_runner/src/app_runner/util/scp.py index 34624239..117d8a5d 100644 --- a/app_runner/src/app_runner/util/scp.py +++ b/app_runner/src/app_runner/util/scp.py @@ -31,7 +31,7 @@ def scp(source: str | Path, target: str | Path, *, user: str | None = None, mkdi if mkdir: if target_remote: host, path = target.split(":", 1) - parent_path = str(Path(path).parent) + parent_path = Path(path).parent logger.debug(f"ssh {host} mkdir -p {parent_path}") subprocess.run(["ssh", host, "mkdir", "-p", parent_path], check=True) else: From fe20ab88748c0beb80a5f78d654bbe61521e47e0 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 15:42:58 +0100 Subject: [PATCH 096/144] add a short section on the workunit reference --- app_runner/docs/workunit_definition.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app_runner/docs/workunit_definition.md b/app_runner/docs/workunit_definition.md index 7f449074..06e08e1b 100644 --- a/app_runner/docs/workunit_definition.md +++ b/app_runner/docs/workunit_definition.md @@ -10,6 +10,18 @@ The `WorkunitDefinition` class is a Pydantic model and can be created by passing However, for convenience and easier integration into command line tools there is a constructor for both creating an instance from a Bfabric entity, and parsing a YAML file which contains a persisted version of the workunit +### Workunit references + +Several functions and command line tools allow providing a "workunit reference". This means, that either the ID or a +path to a local YAML file can be passed to this function. +If the input is a path, then the persisted information will be retrieved to instantiate a `WorkunitDefinition` instance, +whereas if it is an integer, the information will be obtained by querying the B-Fabric API. + +Since in some workflows the workunit will be used several times, and in particular not necessarily in the same process, +the usual entity caching mechanism might not be able to cache the requests. +Therefore, in many cases passing a reference to a YAML file is the preferred way to provide the workunit information, +as it will reduce the number of requests to the B-Fabric API (sometimes even to zero). + ### Reference ```{eval-rst} From 0857c688e5a19e091a9ac5bb2ef5a6fc158631d4 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 16:48:51 +0100 Subject: [PATCH 097/144] add architecture section --- app_runner/docs/architecture/overview.md | 68 ++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 app_runner/docs/architecture/overview.md diff --git a/app_runner/docs/architecture/overview.md b/app_runner/docs/architecture/overview.md new file mode 100644 index 00000000..ffc9eade --- /dev/null +++ b/app_runner/docs/architecture/overview.md @@ -0,0 +1,68 @@ +## Architecture Overview + +### App runner activity diagram + +```{eval-rst} +.. uml:: + + title + App Runner Activity Diagram + end title + + start + :workunit_ref] + partition App Runner { + :Retrieve workunit and app information; + split + :workunit_definition.yml] + split again + :app_definition.yml] + note right + These are maintained in a + centralized repository. + end note + end split + + :Set workunit processing status; + :""app-runner app dispatch""; + note right + This step is supposed to be deterministic! + To allow distributing the tasks in the future. + end note + split + :tasks.yml] + split again + :task1/inputs.yml + task1/params.yml] + split again + :task2/inputs.yml + task2/params.yml] + end split + ' Unclear + ' :Precondition check; + :""app-runner chunk process-all""] + } + + note right + The actual ordering will be decided here. + tasks.yml declares task dependencies. + end note + fork + :Stage inputs 1; + partition App { + :Run task 1; + :outputs.yml] + } + :Register outputs; + fork again + :Stage inputs 2; + partition App { + :Run task 2; + :outputs.yml] + } + :Register outputs; + end fork + + :Set workunit available status; + stop +``` From cd040e01432f675c078cab3111a8ea7680fe4300 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 16:49:27 +0100 Subject: [PATCH 098/144] build plantuml in container --- app_runner/docs/conf.py | 7 ++++++- app_runner/docs/plantuml_wrapper.sh | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100755 app_runner/docs/plantuml_wrapper.sh diff --git a/app_runner/docs/conf.py b/app_runner/docs/conf.py index 2531748e..718cc811 100644 --- a/app_runner/docs/conf.py +++ b/app_runner/docs/conf.py @@ -3,6 +3,8 @@ # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +from pathlib import Path + # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information @@ -13,11 +15,14 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["myst_parser", "sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic"] +extensions = ["myst_parser", "sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic", "sphinxcontrib.plantuml"] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +plantuml = str((Path(__file__).parent / "plantuml_wrapper.sh").absolute()) +plantuml_output_format = "svg" + # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output diff --git a/app_runner/docs/plantuml_wrapper.sh b/app_runner/docs/plantuml_wrapper.sh new file mode 100755 index 00000000..5f70cda1 --- /dev/null +++ b/app_runner/docs/plantuml_wrapper.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +# Configuration +DOCKER="docker" +IMAGE="plantuml/plantuml:1.2024.7" + +# Ensure container exists +$DOCKER pull $IMAGE >/dev/null 2>&1 + +# Read PlantUML input +input=$(cat) + +# Check if input is empty +if [ -z "$input" ]; then + echo "Error: No PlantUML diagram provided via stdin" >&2 + exit 1 +fi + +# Run PlantUML in pipe mode +echo "$input" | $DOCKER run --rm -i \ + --user "$(id -u):$(id -g)" \ + $IMAGE \ + -pipe -tsvg -charset utf-8 From c9ef8b3ee63b5d91db1048f63310363c54f39d3b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 16:50:14 +0100 Subject: [PATCH 099/144] link the new page --- app_runner/docs/index.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/app_runner/docs/index.md b/app_runner/docs/index.md index 18b73216..fc94d635 100644 --- a/app_runner/docs/index.md +++ b/app_runner/docs/index.md @@ -1,15 +1,18 @@ +## Install App Runner + +```bash +pipx install app_runner@git+https://github.com/fgcz/bfabricPy.git@main#egg=app_runner&subdirectory=app_runner +``` + +## Contents + ```{toctree} :glob: workunit_definition +architecture/overview specs/input_specification specs/output_specification specs/app_specification changelog * ``` - -## Install App Runner - -```bash -pipx install app_runner@git+https://github.com/fgcz/bfabricPy.git@main#egg=app_runner&subdirectory=app_runner -``` From 61f07ccae66783e11747fcc4e2e87c1b118b5fc3 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 16:53:23 +0100 Subject: [PATCH 100/144] more generic solution --- app_runner/docs/plantuml_wrapper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/docs/plantuml_wrapper.sh b/app_runner/docs/plantuml_wrapper.sh index 5f70cda1..f21e1b93 100755 --- a/app_runner/docs/plantuml_wrapper.sh +++ b/app_runner/docs/plantuml_wrapper.sh @@ -21,4 +21,4 @@ fi echo "$input" | $DOCKER run --rm -i \ --user "$(id -u):$(id -g)" \ $IMAGE \ - -pipe -tsvg -charset utf-8 + "$@" From 52c26ef0a6208a0d6ec75ddbb7617aafd56ee0be Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 16:55:15 +0100 Subject: [PATCH 101/144] add dependency for plantuml --- app_runner/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 86a19417..c7257688 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -27,6 +27,7 @@ doc = [ "autodoc_pydantic", "sphinx-book-theme", "sphinx-autobuild", + "sphinxcontrib-plantuml", ] dev = [ "app_runner[doc]", From 215c2c6f8b929b77320bcf543f42a1f18aa5b01b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 17:20:47 +0100 Subject: [PATCH 102/144] add app model docs --- app_runner/docs/architecture/overview.md | 6 +++ .../docs/architecture/uml/app_model.plantuml | 41 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 app_runner/docs/architecture/uml/app_model.plantuml diff --git a/app_runner/docs/architecture/overview.md b/app_runner/docs/architecture/overview.md index ffc9eade..fd9bab96 100644 --- a/app_runner/docs/architecture/overview.md +++ b/app_runner/docs/architecture/overview.md @@ -1,5 +1,11 @@ ## Architecture Overview +### App model + +```{eval-rst} +.. uml:: uml/app_model.plantuml +``` + ### App runner activity diagram ```{eval-rst} diff --git a/app_runner/docs/architecture/uml/app_model.plantuml b/app_runner/docs/architecture/uml/app_model.plantuml new file mode 100644 index 00000000..75a9d1de --- /dev/null +++ b/app_runner/docs/architecture/uml/app_model.plantuml @@ -0,0 +1,41 @@ +@startuml + +participant BFabric +participant Storage +participant AppRunner +participant App + +BFabric -> AppRunner: run(app) + +group dispatch app + AppRunner -> App: dispatch(app) + App -> BFabric: query + BFabric -> App: response + App -> AppRunner: "chunks.yml", "chunk1/input.yml", ... +end + +loop for each chunk + group prepare inputs + AppRunner -> BFabric: query + BFabric -> AppRunner: response + Storage -> AppRunner: Copy Files + end + + group process chunk + AppRunner -> App: process("chunk1") + App -> AppRunner: notify + end + + group collect outputs + AppRunner -> App: collect("chunk1") + App -> BFabric: query + BFabric -> App: response + App -> AppRunner: "outputs.yml" + end + + AppRunner -> Storage: Copy Files + AppRunner -> BFabric: Save + +end + +@enduml From 8ca6a0b2ae66024197544ea98c8aab7f5d516ff0 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 6 Nov 2024 17:22:19 +0100 Subject: [PATCH 103/144] move uml to file --- app_runner/docs/architecture/overview.md | 63 +------------------ .../uml/app_runner_activity.plantuml | 62 ++++++++++++++++++ 2 files changed, 63 insertions(+), 62 deletions(-) create mode 100644 app_runner/docs/architecture/uml/app_runner_activity.plantuml diff --git a/app_runner/docs/architecture/overview.md b/app_runner/docs/architecture/overview.md index fd9bab96..dbb59975 100644 --- a/app_runner/docs/architecture/overview.md +++ b/app_runner/docs/architecture/overview.md @@ -9,66 +9,5 @@ ### App runner activity diagram ```{eval-rst} -.. uml:: - - title - App Runner Activity Diagram - end title - - start - :workunit_ref] - partition App Runner { - :Retrieve workunit and app information; - split - :workunit_definition.yml] - split again - :app_definition.yml] - note right - These are maintained in a - centralized repository. - end note - end split - - :Set workunit processing status; - :""app-runner app dispatch""; - note right - This step is supposed to be deterministic! - To allow distributing the tasks in the future. - end note - split - :tasks.yml] - split again - :task1/inputs.yml - task1/params.yml] - split again - :task2/inputs.yml - task2/params.yml] - end split - ' Unclear - ' :Precondition check; - :""app-runner chunk process-all""] - } - - note right - The actual ordering will be decided here. - tasks.yml declares task dependencies. - end note - fork - :Stage inputs 1; - partition App { - :Run task 1; - :outputs.yml] - } - :Register outputs; - fork again - :Stage inputs 2; - partition App { - :Run task 2; - :outputs.yml] - } - :Register outputs; - end fork - - :Set workunit available status; - stop +.. uml:: uml/app_runner_activity.plantuml ``` diff --git a/app_runner/docs/architecture/uml/app_runner_activity.plantuml b/app_runner/docs/architecture/uml/app_runner_activity.plantuml new file mode 100644 index 00000000..c880c126 --- /dev/null +++ b/app_runner/docs/architecture/uml/app_runner_activity.plantuml @@ -0,0 +1,62 @@ +@startuml + title + App Runner Activity Diagram + end title + + start + :workunit_ref] + partition App Runner { + :Retrieve workunit and app information; + split + :workunit_definition.yml] + split again + :app_definition.yml] + note right + These are maintained in a + centralized repository. + end note + end split + + :Set workunit processing status; + :""app-runner app dispatch""; + note right + This step is supposed to be deterministic! + To allow distributing the tasks in the future. + end note + split + :tasks.yml] + split again + :task1/inputs.yml + task1/params.yml] + split again + :task2/inputs.yml + task2/params.yml] + end split + ' Unclear + ' :Precondition check; + :""app-runner chunk process-all""] + } + + note right + The actual ordering will be decided here. + tasks.yml declares task dependencies. + end note + fork + :Stage inputs 1; + partition App { + :Run task 1; + :outputs.yml] + } + :Register outputs; + fork again + :Stage inputs 2; + partition App { + :Run task 2; + :outputs.yml] + } + :Register outputs; + end fork + + :Set workunit available status; + stop +@enduml From b5107be65a1da6aa3f59936dad4c967ad80ad9e0 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 09:03:36 +0100 Subject: [PATCH 104/144] delete old main script --- .../src/app_runner/app_runner/__main__.py | 39 ------------------- 1 file changed, 39 deletions(-) delete mode 100644 app_runner/src/app_runner/app_runner/__main__.py diff --git a/app_runner/src/app_runner/app_runner/__main__.py b/app_runner/src/app_runner/app_runner/__main__.py deleted file mode 100644 index 18dbc674..00000000 --- a/app_runner/src/app_runner/app_runner/__main__.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import annotations - -import argparse -from pathlib import Path -from typing import Union - -import yaml -from pydantic import TypeAdapter - -from bfabric.bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging -from app_runner.app_runner._spec import AppSpec -from app_runner.app_runner.runner import run_app - - -def main() -> None: - setup_script_logging() - client = Bfabric.from_config() - parser = argparse.ArgumentParser() - parser.add_argument("action", default="run", choices=["run"]) - parser.add_argument("--app-spec", type=Path, required=True) - parser.add_argument("--workunit-ref", type=TypeAdapter(Union[int, Path]).validate_strings, required=True) - parser.add_argument("--work-dir", type=Path, required=True) - parser.add_argument("--ssh-user", type=str, required=False) - parser.add_argument("--read-only", action="store_true") - args = parser.parse_args() - app_spec = AppSpec.model_validate(yaml.safe_load(args.app_spec.read_text())) - run_app( - app_spec=app_spec, - workunit_ref=args.workunit_ref, - work_dir=args.work_dir, - client=client, - ssh_user=args.ssh_user, - read_only=args.read_only, - ) - - -if __name__ == "__main__": - main() From f533c9e1b3a0729efa2d00cf99c4d350af5506c3 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 09:17:15 +0100 Subject: [PATCH 105/144] improve code quality --- app_runner/pyproject.toml | 4 ++++ app_runner/src/app_runner/app_runner/_spec.py | 8 +++---- .../src/app_runner/app_runner/runner.py | 11 ++++++--- app_runner/src/app_runner/cli/app.py | 5 +++- app_runner/src/app_runner/cli/chunk.py | 5 +++- app_runner/src/app_runner/cli/inputs.py | 3 ++- app_runner/src/app_runner/cli/outputs.py | 5 +++- app_runner/src/app_runner/cli/validate.py | 5 +++- .../dispatch/dispatch_individual_resources.py | 18 ++++++++++---- .../app_runner/input_preparation/integrity.py | 7 ++++-- .../input_preparation/list_inputs.py | 11 ++++++--- .../app_runner/input_preparation/prepare.py | 21 +++++++++++++--- .../src/app_runner/input_preparation/spec.py | 6 ++--- .../output_registration/__init__.py | 2 +- .../output_registration/register.py | 24 +++++++++++++------ .../app_runner/output_registration/spec.py | 8 ++++--- app_runner/src/app_runner/util/checksums.py | 5 +++- 17 files changed, 108 insertions(+), 40 deletions(-) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index c7257688..57ba85c4 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -44,3 +44,7 @@ line-length = 120 [tool.ruff] line-length = 120 indent-width = 4 + +[tool.ruff.lint] +select = ["ANN", "BLE", "D103", "E", "EXE", "F", "N", "PLW", "PTH", "SIM", "TCH", "UP", "W191"] +ignore = ["ANN101", "ANN102"] diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index b8f5c35f..5c6cdae1 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -3,7 +3,7 @@ import os import shlex from pathlib import Path -from typing import Literal, Annotated, Union +from typing import Literal, Annotated from pydantic import BaseModel, Discriminator @@ -24,7 +24,7 @@ class MountOptions(BaseModel): read_only: list[tuple[Path, Path]] = [] share_bfabric_config: bool = True - def collect(self, work_dir: Path): + def collect(self, work_dir: Path) -> list[tuple[Path, Path, bool]]: mounts = [] if self.share_bfabric_config: mounts.append((Path("~/.bfabricpy.yml"), Path("/home/user/.bfabricpy.yml"), True)) @@ -48,7 +48,7 @@ class CommandDocker(BaseModel): mounts: MountOptions = MountOptions() def to_shell(self, work_dir: Path | None = None) -> list[str]: - work_dir = (work_dir or Path("")).expanduser().absolute() + work_dir = (work_dir or Path()).expanduser().absolute() mounts = self.mounts.collect(work_dir=work_dir) mount_args = [] for host, container, read_only in mounts: @@ -76,7 +76,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: ] -Command = Annotated[Union[CommandShell, CommandDocker], Discriminator("type")] +Command = Annotated[CommandShell | CommandDocker, Discriminator("type")] class CommandsSpec(BaseModel): diff --git a/app_runner/src/app_runner/app_runner/runner.py b/app_runner/src/app_runner/app_runner/runner.py index 03ee5fe3..7cd8c596 100644 --- a/app_runner/src/app_runner/app_runner/runner.py +++ b/app_runner/src/app_runner/app_runner/runner.py @@ -3,16 +3,19 @@ import shlex import subprocess from pathlib import Path +from typing import TYPE_CHECKING import yaml +from bfabric.experimental.workunit_definition import WorkunitDefinition from loguru import logger from pydantic import BaseModel -from bfabric import Bfabric -from app_runner.app_runner._spec import AppSpec from app_runner.input_preparation import prepare_folder from app_runner.output_registration import register_outputs -from bfabric.experimental.workunit_definition import WorkunitDefinition + +if TYPE_CHECKING: + from app_runner.app_runner._spec import AppSpec + from bfabric import Bfabric class Runner: @@ -66,6 +69,8 @@ def run_app( read_only: bool = False, dispatch_active: bool = True, ) -> None: + """Executes all steps of the provided app.""" + # TODO would it be possible, to reuse the individual steps commands so there is certainly only one definition? work_dir = work_dir.resolve() workunit_ref = workunit_ref.resolve() if isinstance(workunit_ref, Path) else workunit_ref diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py index e2297ed3..3531b5be 100644 --- a/app_runner/src/app_runner/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path import cyclopts import yaml @@ -10,6 +9,10 @@ from app_runner.app_runner._spec import AppSpec from app_runner.app_runner.runner import run_app, Runner from bfabric.experimental.entity_lookup_cache import EntityLookupCache +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path app_app = cyclopts.App("app", help="Run an app.") diff --git a/app_runner/src/app_runner/cli/chunk.py b/app_runner/src/app_runner/cli/chunk.py index d04dfa09..f8829a2b 100644 --- a/app_runner/src/app_runner/cli/chunk.py +++ b/app_runner/src/app_runner/cli/chunk.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path import cyclopts import yaml @@ -10,6 +9,10 @@ from app_runner.app_runner._spec import AppSpec from app_runner.app_runner.runner import run_app, Runner from bfabric.experimental.entity_lookup_cache import EntityLookupCache +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path app_chunk = cyclopts.App("chunk", help="Run an app on a chunk. You can create the chunks with `app dispatch`.") diff --git a/app_runner/src/app_runner/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py index 50327d49..08987b32 100644 --- a/app_runner/src/app_runner/cli/inputs.py +++ b/app_runner/src/app_runner/cli/inputs.py @@ -69,10 +69,11 @@ def get_inputs_and_print( target_folder: Path | None, check: bool, ) -> list[FileState]: + """Reads the input files, performing integrity checks if requested, and prints the results.""" client = Bfabric.from_config() input_states = list_input_states( specs=InputsSpec.read_yaml(inputs_yaml), - target_folder=target_folder or Path(""), + target_folder=target_folder or Path(), client=client, check_files=check, ) diff --git a/app_runner/src/app_runner/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py index cf5db96c..0acd91e8 100644 --- a/app_runner/src/app_runner/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path import cyclopts @@ -9,6 +8,10 @@ from bfabric.entities import Workunit from app_runner.output_registration.spec import OutputsSpec from app_runner.output_registration.register import register_all +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path app_outputs = cyclopts.App("outputs", help="Register output files for an app.") diff --git a/app_runner/src/app_runner/cli/validate.py b/app_runner/src/app_runner/cli/validate.py index 96ca35f4..76d5c511 100644 --- a/app_runner/src/app_runner/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path import cyclopts import rich @@ -10,6 +9,10 @@ from app_runner.app_runner._spec import AppSpec from app_runner.input_preparation.spec import InputsSpec from app_runner.output_registration.spec import OutputsSpec +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path app_validate = cyclopts.App("validate", help="Validate yaml files.") diff --git a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index ef2917be..f2ad0d28 100644 --- a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -1,15 +1,16 @@ from __future__ import annotations -from pathlib import Path -from typing import Any, Self +from typing import Any, TYPE_CHECKING import yaml +from bfabric.entities import Resource, Dataset from loguru import logger from pydantic import BaseModel, ConfigDict, model_validator -from bfabric import Bfabric -from bfabric.entities import Resource, Dataset -from bfabric.experimental.workunit_definition import WorkunitDefinition +if TYPE_CHECKING: + from pathlib import Path + from bfabric import Bfabric + from bfabric.experimental.workunit_definition import WorkunitDefinition class ConfigResourceFlow(BaseModel): @@ -35,6 +36,13 @@ def check_at_least_one_flow(self) -> ConfigDispatchIndividualResources: def config_msi_imzml() -> ConfigDispatchIndividualResources: + """Returns the configuration for dispatching MSI imzML datasets to chunks. + + These apps allow both being run with a list of input `.imzML` resource files, or a dataset which contains a column + `Imzml` with the resource IDs and a column `PanelDataset` with the dataset IDs. + + Note: In the future the specifics of this might be adapted to allow e.g. `.imzML.7z` files or similar. + """ return ConfigDispatchIndividualResources( resource_flow=ConfigResourceFlow(filter_suffix=".imzML"), dataset_flow=ConfigDatasetFlow(resource_column="Imzml", param_columns=[("PanelDataset", "mass_list_id")]), diff --git a/app_runner/src/app_runner/input_preparation/integrity.py b/app_runner/src/app_runner/input_preparation/integrity.py index ee8e1a09..222e4665 100644 --- a/app_runner/src/app_runner/input_preparation/integrity.py +++ b/app_runner/src/app_runner/input_preparation/integrity.py @@ -1,12 +1,15 @@ from __future__ import annotations from enum import Enum -from pathlib import Path -from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset from app_runner.input_preparation.spec import InputSpecType, ResourceSpec, DatasetSpec from app_runner.util.checksums import md5sum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + from bfabric.bfabric import Bfabric class IntegrityState(Enum): diff --git a/app_runner/src/app_runner/input_preparation/list_inputs.py b/app_runner/src/app_runner/input_preparation/list_inputs.py index 6f4f73dc..2f853e98 100644 --- a/app_runner/src/app_runner/input_preparation/list_inputs.py +++ b/app_runner/src/app_runner/input_preparation/list_inputs.py @@ -1,14 +1,17 @@ from __future__ import annotations from dataclasses import dataclass -from pathlib import Path from rich.console import Console from rich.table import Table, Column -from bfabric.bfabric import Bfabric -from app_runner.input_preparation.spec import InputSpecType from app_runner.input_preparation.integrity import check_integrity, IntegrityState +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from app_runner.input_preparation.spec import InputSpecType + from pathlib import Path + from bfabric.bfabric import Bfabric @dataclass @@ -26,6 +29,7 @@ def list_input_states( client: Bfabric, check_files: bool, ) -> list[FileState]: + """Returns the states of the input files, performing integrity checks if requested.""" input_states = [] for spec in specs: filename = spec.resolve_filename(client=client) @@ -40,6 +44,7 @@ def list_input_states( def print_input_states(input_states: list[FileState]) -> None: + """Prints the states of the input files to the command line.""" table = Table( Column("File"), Column("Input Type"), diff --git a/app_runner/src/app_runner/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py index 5c9a7646..b8499ffd 100644 --- a/app_runner/src/app_runner/input_preparation/prepare.py +++ b/app_runner/src/app_runner/input_preparation/prepare.py @@ -1,10 +1,8 @@ from __future__ import annotations -from pathlib import Path from loguru import logger -from bfabric.bfabric import Bfabric from bfabric.entities import Resource, Dataset from app_runner.input_preparation.spec import ( ResourceSpec, @@ -16,6 +14,11 @@ from app_runner.input_preparation.list_inputs import list_input_states from app_runner.util.checksums import md5sum from app_runner.util.scp import scp +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from pathlib import Path + from bfabric.bfabric import Bfabric class PrepareInputs: @@ -96,8 +99,20 @@ def clean_dataset(self, spec: DatasetSpec) -> None: def prepare_folder( - inputs_yaml: Path, target_folder: Path | None, client: Bfabric, ssh_user: str | None, action: str = "prepare" + inputs_yaml: Path, + target_folder: Path | None, + client: Bfabric, + ssh_user: str | None, + action: Literal["prepare", "clean"] = "prepare", ) -> None: + """Prepares the input files of a chunk folder according to the provided specs. + + :param inputs_yaml: Path to the inputs.yml file. + :param target_folder: Path to the target folder where the input files should be downloaded. + :param client: Bfabric client to use for obtaining metadata about the input files. + :param ssh_user: SSH user to use for downloading the input files, should it be different from the current user. + :param action: Action to perform. + """ # set defaults inputs_yaml = inputs_yaml.absolute() if target_folder is None: diff --git a/app_runner/src/app_runner/input_preparation/spec.py b/app_runner/src/app_runner/input_preparation/spec.py index e76f24de..d5e1c0f4 100644 --- a/app_runner/src/app_runner/input_preparation/spec.py +++ b/app_runner/src/app_runner/input_preparation/spec.py @@ -1,7 +1,6 @@ from __future__ import annotations -from pathlib import Path -from typing import Annotated, Literal, Union, TYPE_CHECKING +from typing import Annotated, Literal, TYPE_CHECKING import yaml from pydantic import BaseModel, ConfigDict, Field, Discriminator @@ -12,6 +11,7 @@ RelativeFilePath = Annotated[str, Field(pattern=r"^[^/][^:]*$")] if TYPE_CHECKING: + from pathlib import Path from bfabric.bfabric import Bfabric @@ -56,7 +56,7 @@ def resolve_filename(self, client: Bfabric) -> str: return self.filename -InputSpecType = Annotated[Union[ResourceSpec, DatasetSpec], Discriminator("type")] +InputSpecType = Annotated[ResourceSpec | DatasetSpec, Discriminator("type")] class InputsSpec(BaseModel): diff --git a/app_runner/src/app_runner/output_registration/__init__.py b/app_runner/src/app_runner/output_registration/__init__.py index 3cbba34e..1962bf10 100644 --- a/app_runner/src/app_runner/output_registration/__init__.py +++ b/app_runner/src/app_runner/output_registration/__init__.py @@ -1,3 +1,3 @@ from .register import register_outputs -__ALL__ = ["register_outputs"] +__all__ = ["register_outputs"] diff --git a/app_runner/src/app_runner/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py index 6898ff46..4a6c4d09 100644 --- a/app_runner/src/app_runner/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -1,10 +1,8 @@ from __future__ import annotations -from pathlib import Path from loguru import logger -from bfabric import Bfabric from bfabric.entities import Storage, Workunit from app_runner.output_registration.spec import ( CopyResourceSpec, @@ -17,6 +15,11 @@ from app_runner.util.scp import scp from bfabric_scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset from glom import glom +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + from bfabric import Bfabric def _get_output_folder(spec: CopyResourceSpec, workunit: Workunit) -> Path: @@ -32,7 +35,8 @@ def register_file_in_workunit( workunit: Workunit, storage: Storage, resource_id: int | None = None, -): +) -> None: + """Registers a file in the workunit.""" if spec.update_existing != UpdateExisting.NO: # TODO implement this functionality raise NotImplementedError("Update existing not implemented") @@ -52,15 +56,17 @@ def register_file_in_workunit( client.save("resource", resource_data) -def copy_file_to_storage(spec: CopyResourceSpec, workunit: Workunit, storage: Storage, ssh_user: str | None): +def copy_file_to_storage(spec: CopyResourceSpec, workunit: Workunit, storage: Storage, ssh_user: str | None) -> None: + """Copies a file to the storage, according to the spec.""" output_folder = _get_output_folder(spec, workunit=workunit) output_uri = f"{storage.scp_prefix}{output_folder / spec.store_entry_path}" scp(spec.local_path, output_uri, user=ssh_user) -def _save_dataset(spec: SaveDatasetSpec, client: Bfabric, workunit: Workunit): +def _save_dataset(spec: SaveDatasetSpec, client: Bfabric, workunit: Workunit) -> None: + """Saves a dataset to the bfabric.""" # TODO should not print to stdout in the future - # TODO also it should not be imported from bfabric_scripts, but rather the generic functioanlity should be available + # TODO also it should not be imported from bfabric_scripts, but rather the generic functionality should be available # in the main package bfabric_save_csv2dataset( client=client, @@ -75,6 +81,7 @@ def _save_dataset(spec: SaveDatasetSpec, client: Bfabric, workunit: Workunit): def find_default_resource_id(workunit: Workunit) -> int | None: + """Finds the default resource's id for the workunit. Maybe in the future, this will be always `None`.""" candidate_resources = [ resource for resource in workunit.resources if resource["name"] not in ["slurm_stdout", "slurm_stderr"] ] @@ -86,7 +93,8 @@ def find_default_resource_id(workunit: Workunit) -> int | None: def register_all( client: Bfabric, workunit: Workunit, specs_list: list[SpecType], ssh_user: str | None, reuse_default_resource: bool -): +) -> None: + """Registers all the output specs to the workunit.""" default_resource_was_reused = not reuse_default_resource for spec in specs_list: logger.debug(f"Registering {spec}") @@ -112,6 +120,8 @@ def register_outputs( ssh_user: str | None, reuse_default_resource: bool, ) -> None: + """Registers outputs to the workunit.""" + # TODO it seems there is some redundancy here (i.e. there is also the implementation in runner) # parse the specs specs_list = OutputsSpec.read_yaml(outputs_yaml) diff --git a/app_runner/src/app_runner/output_registration/spec.py b/app_runner/src/app_runner/output_registration/spec.py index 01f7ccbc..98a8a4f9 100644 --- a/app_runner/src/app_runner/output_registration/spec.py +++ b/app_runner/src/app_runner/output_registration/spec.py @@ -1,12 +1,14 @@ from __future__ import annotations import enum -from pathlib import Path -from typing import Literal, Union, Annotated +from typing import Literal, Annotated, TYPE_CHECKING import yaml from pydantic import BaseModel, ConfigDict, Field +if TYPE_CHECKING: + from pathlib import Path + class UpdateExisting(enum.Enum): NO = "no" @@ -42,7 +44,7 @@ class SaveDatasetSpec(BaseModel): invalid_characters: str = "" -SpecType = Union[CopyResourceSpec, SaveDatasetSpec] +SpecType = CopyResourceSpec | SaveDatasetSpec class OutputsSpec(BaseModel): diff --git a/app_runner/src/app_runner/util/checksums.py b/app_runner/src/app_runner/util/checksums.py index e830f41f..e683af0a 100644 --- a/app_runner/src/app_runner/util/checksums.py +++ b/app_runner/src/app_runner/util/checksums.py @@ -1,7 +1,10 @@ from __future__ import annotations import hashlib -from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path def md5sum(file: Path) -> str: From 0ba25bd5ae5d7ccced4fa070b7abceb78d397125 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 09:20:40 +0100 Subject: [PATCH 106/144] add type checking to dev environment --- app_runner/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 57ba85c4..21ca1392 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -31,6 +31,7 @@ doc = [ ] dev = [ "app_runner[doc]", + "bfabric[dev, typing]", "mypy", "types-PyYAML", ] From 7ee1dd64081d7cc4fe8052cf4483d73a8bf78f22 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 09:31:48 +0100 Subject: [PATCH 107/144] fix a few typing errors --- app_runner/src/app_runner/app_runner/runner.py | 6 +++++- app_runner/src/app_runner/cli/outputs.py | 5 ++++- .../dispatch/dispatch_individual_resources.py | 3 +++ .../src/app_runner/input_preparation/prepare.py | 13 +++++++++---- .../src/app_runner/output_registration/register.py | 3 +++ 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/app_runner/src/app_runner/app_runner/runner.py b/app_runner/src/app_runner/app_runner/runner.py index 7cd8c596..1eaabbd0 100644 --- a/app_runner/src/app_runner/app_runner/runner.py +++ b/app_runner/src/app_runner/app_runner/runner.py @@ -46,9 +46,13 @@ def run_process(self, chunk_dir: Path) -> None: def run_register_outputs(self, chunk_dir: Path, workunit_ref: int | Path, reuse_default_resource: bool) -> None: workunit_definition = WorkunitDefinition.from_ref(workunit_ref, client=self._client) + registration = workunit_definition.registration + if registration is None: + msg = "Workunit definition does not provide registration information" + raise ValueError(msg) register_outputs( outputs_yaml=chunk_dir / "outputs.yml", - workunit_id=workunit_definition.registration.workunit_id, + workunit_id=registration.workunit_id, client=self._client, ssh_user=self._ssh_user, reuse_default_resource=reuse_default_resource, diff --git a/app_runner/src/app_runner/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py index 0acd91e8..2fe45a53 100644 --- a/app_runner/src/app_runner/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -1,6 +1,5 @@ from __future__ import annotations - import cyclopts from bfabric import Bfabric @@ -32,6 +31,10 @@ def register( specs_list = OutputsSpec.read_yaml(outputs_yaml) workunit = Workunit.find(id=workunit_id, client=client) + if workunit is None: + msg = f"Workunit with id {workunit_id} not found" + raise ValueError(msg) + register_all( client=client, workunit=workunit, diff --git a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index f2ad0d28..2344cf6c 100644 --- a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -107,6 +107,9 @@ def _dispatch_jobs_dataset_flow(self, definition: WorkunitDefinition, params: di if config is None: raise ValueError("dataset_flow is not configured") dataset = Dataset.find(id=definition.execution.dataset, client=self._client) + if dataset is None: + msg = f"Dataset with id {definition.execution.dataset} not found" + raise ValueError(msg) dataset_df = dataset.to_polars() resources = Resource.find_all(ids=dataset_df[config.resource_column].unique().to_list(), client=self._client) paths = [] diff --git a/app_runner/src/app_runner/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py index b8499ffd..1bca285c 100644 --- a/app_runner/src/app_runner/input_preparation/prepare.py +++ b/app_runner/src/app_runner/input_preparation/prepare.py @@ -1,20 +1,20 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Literal +from bfabric.entities import Resource, Dataset from loguru import logger -from bfabric.entities import Resource, Dataset +from app_runner.input_preparation.integrity import IntegrityState +from app_runner.input_preparation.list_inputs import list_input_states from app_runner.input_preparation.spec import ( ResourceSpec, DatasetSpec, InputSpecType, InputsSpec, ) -from app_runner.input_preparation.integrity import IntegrityState -from app_runner.input_preparation.list_inputs import list_input_states from app_runner.util.checksums import md5sum from app_runner.util.scp import scp -from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: from pathlib import Path @@ -55,8 +55,13 @@ def clean_all(self, specs: list[InputSpecType]) -> None: def prepare_resource(self, spec: ResourceSpec) -> None: resource = Resource.find(id=spec.id, client=self._client) + if resource is None: + msg = f"Resource with id {spec.id} not found" + raise ValueError(msg) # determine path to copy from + # TODO as we have seen sometimes a faster approach would be to copy from the NFS mount, but this needs to be + # configured or recognized somehow scp_uri = f"{resource.storage.scp_prefix}{resource['relativepath']}" # determine path to copy to diff --git a/app_runner/src/app_runner/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py index 4a6c4d09..3e8ae7cd 100644 --- a/app_runner/src/app_runner/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -127,6 +127,9 @@ def register_outputs( # register all specs workunit = Workunit.find(id=workunit_id, client=client) + if workunit is None: + msg = f"Workunit with id {workunit_id} not found" + raise ValueError(msg) register_all( client=client, workunit=workunit, From bb5cf478c7f84cfc2d94e2ec7adde2b4e82bfcec Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 09:53:52 +0100 Subject: [PATCH 108/144] fix several lints --- pyproject.toml | 12 ++++++++--- src/bfabric/__init__.py | 3 +-- src/bfabric/bfabric.py | 10 +++++---- src/bfabric/bfabric2.py | 1 - src/bfabric/config/bfabric_client_config.py | 5 +++-- src/bfabric/config/config_file.py | 8 +++---- src/bfabric/engine/response_format_suds.py | 2 +- src/bfabric/entities/application.py | 2 +- src/bfabric/entities/core/entity.py | 6 +++--- src/bfabric/entities/core/has_many.py | 21 ++++++++++++------- src/bfabric/entities/core/has_one.py | 12 +++++++---- src/bfabric/entities/core/relationship.py | 6 ++++-- src/bfabric/entities/dataset.py | 6 ++++-- src/bfabric/entities/executable.py | 6 ++++-- src/bfabric/entities/externaljob.py | 2 +- src/bfabric/entities/multiplexid.py | 6 ++++-- src/bfabric/entities/multiplexkit.py | 2 +- src/bfabric/entities/order.py | 2 +- src/bfabric/entities/parameter.py | 6 ++++-- src/bfabric/entities/project.py | 6 ++++-- src/bfabric/entities/resource.py | 2 +- src/bfabric/entities/storage.py | 6 ++++-- src/bfabric/entities/workunit.py | 2 +- .../experimental/entity_lookup_cache.py | 5 +++-- .../experimental/workunit_definition.py | 6 ++++-- src/bfabric/results/result_container.py | 2 +- src/bfabric/utils/paginator.py | 5 ++++- 27 files changed, 94 insertions(+), 58 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 59588b99..52497a7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,9 +93,15 @@ indent-width = 4 target-version = "py39" [tool.ruff.lint] -#select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP", "TCH", "N"] -select = ["PLW", "SIM", "UP", "EXE", "E701", "E702", "E703", "E711", "E713", "E714", "FA100", "FA102", "W191"] -ignore = ["ANN101", "ANN102"] +select = ["ANN", "BLE", "D103", "E", "EXE", "F", "N", "PLW", "PTH", "SIM", "TCH", "UP", "W191"] +ignore = ["ANN101", "ANN102", "ANN401"] + +[tool.ruff.lint.per-file-ignores] +"**/bfabric_scripts/**" = ["ALL"] +"**/wrapper_creator/**" = ["ALL"] +"**/examples/**" = ["ALL"] +"**/tests/**" = ["ALL"] +"noxfile.py" = ["ALL"] [tool.licensecheck] using = "PEP631" diff --git a/src/bfabric/__init__.py b/src/bfabric/__init__.py index 8970b7fb..fee7836f 100644 --- a/src/bfabric/__init__.py +++ b/src/bfabric/__init__.py @@ -1,7 +1,7 @@ import importlib.metadata from bfabric.bfabric import Bfabric, BfabricAPIEngineType -from bfabric.config import BfabricAuth, BfabricClientConfig +from bfabric.config.bfabric_auth import BfabricAuth from bfabric.config.bfabric_client_config import BfabricClientConfig __all__ = [ @@ -11,5 +11,4 @@ "BfabricClientConfig", ] - __version__ = importlib.metadata.version("bfabric") diff --git a/src/bfabric/bfabric.py b/src/bfabric/bfabric.py index 1c3ba97a..6aa2ac8e 100644 --- a/src/bfabric/bfabric.py +++ b/src/bfabric/bfabric.py @@ -22,8 +22,7 @@ from functools import cached_property from pathlib import Path from pprint import pprint -from typing import Literal, Any -from collections.abc import Generator +from typing import Literal, Any, TYPE_CHECKING from loguru import logger from rich.console import Console @@ -37,6 +36,9 @@ from bfabric.results.result_container import ResultContainer from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT +if TYPE_CHECKING: + from collections.abc import Generator + class BfabricAPIEngineType(Enum): """Choice of engine to use.""" @@ -277,7 +279,7 @@ def __repr__(self) -> str: __str__ = __repr__ - def __getstate__(self): + def __getstate__(self) -> dict[str, Any]: return { "config": self._config, "auth": self._auth, @@ -285,7 +287,7 @@ def __getstate__(self): "query_counter": self.query_counter, } - def __setstate__(self, state): + def __setstate__(self, state: dict[str, Any]) -> None: self._config = state["config"] self._auth = state["auth"] self._engine_type = state["engine_type"] diff --git a/src/bfabric/bfabric2.py b/src/bfabric/bfabric2.py index 869842f7..ac138da5 100755 --- a/src/bfabric/bfabric2.py +++ b/src/bfabric/bfabric2.py @@ -3,4 +3,3 @@ warnings.warn("bfabric.bfabric2 module is deprecated, use bfabric instead", DeprecationWarning) # TODO deprecated - import from bfabric instead -from bfabric.bfabric import Bfabric, BfabricAPIEngineType, get_system_auth diff --git a/src/bfabric/config/bfabric_client_config.py b/src/bfabric/config/bfabric_client_config.py index 083219a8..e7bfe72b 100644 --- a/src/bfabric/config/bfabric_client_config.py +++ b/src/bfabric/config/bfabric_client_config.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Annotated +from typing import Annotated, Any from pydantic import BaseModel, BeforeValidator, Field, TypeAdapter, AnyHttpUrl @@ -25,7 +25,8 @@ class BfabricClientConfig(BaseModel): application_ids: Annotated[dict[str, int], Field(default_factory=dict)] job_notification_emails: Annotated[str, Field(default="")] - def __init__(self, **kwargs) -> None: + def __init__(self, **kwargs: Any) -> None: + # TODO remove this custom constructor (note that this is currently used in some places when "None" is passed) super().__init__(**{key: value for key, value in kwargs.items() if value is not None}) def copy_with( diff --git a/src/bfabric/config/config_file.py b/src/bfabric/config/config_file.py index c8e86a5e..1983ba9a 100644 --- a/src/bfabric/config/config_file.py +++ b/src/bfabric/config/config_file.py @@ -1,7 +1,7 @@ from __future__ import annotations import os -from typing import Annotated +from typing import Annotated, Any from loguru import logger from pydantic import BaseModel, Field, model_validator @@ -21,14 +21,14 @@ class EnvironmentConfig(BaseModel): @model_validator(mode="before") @classmethod - def gather_config(cls, values): + def gather_config(cls, values: dict[str, Any]) -> dict[str, Any]: """Gathers all configs into the config field.""" values["config"] = {key: value for key, value in values.items() if key not in ["login", "password"]} return values @model_validator(mode="before") @classmethod - def gather_auth(cls, values): + def gather_auth(cls, values: dict[str, Any]) -> dict[str, Any]: if "login" in values: values["auth"] = BfabricAuth.model_validate(values) return values @@ -40,7 +40,7 @@ class ConfigFile(BaseModel): @model_validator(mode="before") @classmethod - def gather_configs(cls, values): + def gather_configs(cls, values: dict[str, Any]) -> dict[str, Any]: """Gathers all configs into the configs field.""" configs = {} for key, value in values.items(): diff --git a/src/bfabric/engine/response_format_suds.py b/src/bfabric/engine/response_format_suds.py index 78748a23..eda2988b 100644 --- a/src/bfabric/engine/response_format_suds.py +++ b/src/bfabric/engine/response_format_suds.py @@ -22,7 +22,7 @@ def convert_suds_type(item: Any) -> int | str | Any: return item -def suds_asdict_recursive(d, convert_types: bool = False) -> dict[str, Value]: +def suds_asdict_recursive(d: Any, convert_types: bool = False) -> dict[str, Value]: """Convert Suds object into serializable format. https://stackoverflow.com/a/15678861 :param d: The input suds object diff --git a/src/bfabric/entities/application.py b/src/bfabric/entities/application.py index cb4ccceb..2296f3e3 100644 --- a/src/bfabric/entities/application.py +++ b/src/bfabric/entities/application.py @@ -2,11 +2,11 @@ from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.executable import Executable from bfabric.entities.storage import Storage diff --git a/src/bfabric/entities/core/entity.py b/src/bfabric/entities/core/entity.py index 543274c8..abd39793 100644 --- a/src/bfabric/entities/core/entity.py +++ b/src/bfabric/entities/core/entity.py @@ -1,15 +1,15 @@ from __future__ import annotations -from collections.abc import Iterable from typing import TYPE_CHECKING from loguru import logger -from bfabric import Bfabric from bfabric.experimental import MultiQuery from bfabric.experimental.entity_lookup_cache import EntityLookupCache if TYPE_CHECKING: + from collections.abc import Iterable + from bfabric import Bfabric from typing import Any, Self @@ -101,7 +101,7 @@ def __repr__(self) -> str: __str__ = __repr__ @classmethod - def __check_ids_list(cls, ids) -> list[int]: + def __check_ids_list(cls, ids: list[int]) -> list[int]: """Converts the ids to a list of integers (if they are not already) and raises an error if this fails or there are duplicates.""" ids_requested = [int(id) for id in ids] diff --git a/src/bfabric/entities/core/has_many.py b/src/bfabric/entities/core/has_many.py index e0f4456c..22f815fa 100644 --- a/src/bfabric/entities/core/has_many.py +++ b/src/bfabric/entities/core/has_many.py @@ -1,15 +1,18 @@ from __future__ import annotations -from typing import Generic, TypeVar -from collections.abc import Iterator - +from typing import Generic, TypeVar, TYPE_CHECKING +from bfabric.entities.core.relationship import Relationship from polars import DataFrame -from bfabric import Bfabric -from bfabric.entities.core.entity import Entity # type: ignore -from bfabric.entities.core.relationship import Relationship +if TYPE_CHECKING: + from collections.abc import Iterator + from bfabric import Bfabric + + # noinspection PyUnresolvedReferences + from bfabric.entities.core.entity import Entity E = TypeVar("E", bound="Entity") +T = TypeVar("T") class HasMany(Relationship[E]): @@ -28,15 +31,17 @@ def __init__( self._client_property = client_property self._optional = optional - def __get__(self, obj, objtype=None) -> _HasManyProxy: + def __get__(self, obj: T | None, objtype: type[T] | None = None) -> _HasManyProxy: cache_attr = f"_HasMany__{self._ids_property or self._bfabric_field}_cache" + if obj is None: + raise ValueError("Cannot access HasMany relationship on class") if not hasattr(obj, cache_attr): ids = self._get_ids(obj) client = getattr(obj, self._client_property) setattr(obj, cache_attr, _HasManyProxy(entity_type=self._entity_type, ids=ids, client=client)) return getattr(obj, cache_attr) - def _get_ids(self, obj) -> list[int]: + def _get_ids(self, obj: T) -> list[int]: if self._bfabric_field is not None: if self._ids_property is not None: raise ValueError("Exactly one of bfabric_field and ids_property must be set, but both are set") diff --git a/src/bfabric/entities/core/has_one.py b/src/bfabric/entities/core/has_one.py index 5211876b..2fdc13ee 100644 --- a/src/bfabric/entities/core/has_one.py +++ b/src/bfabric/entities/core/has_one.py @@ -1,11 +1,15 @@ from __future__ import annotations -from typing import TypeVar +from typing import TypeVar, TYPE_CHECKING -from bfabric.entities.core.entity import Entity # type: ignore from bfabric.entities.core.relationship import Relationship +if TYPE_CHECKING: + # noinspection PyUnresolvedReferences + from bfabric.entities.core.entity import Entity + E = TypeVar("E", bound="Entity") +T = TypeVar("T") class HasOne(Relationship[E]): @@ -14,13 +18,13 @@ def __init__(self, entity: str, *, bfabric_field: str, optional: bool = False) - self._bfabric_field = bfabric_field self._optional = optional - def __get__(self, obj, objtype=None) -> E | None: + def __get__(self, obj: T | None, objtype: type[T] | None = None) -> E | None: cache_attr = f"_HasOne__{self._bfabric_field}_cache" if not hasattr(obj, cache_attr): setattr(obj, cache_attr, self._load_entity(obj=obj)) return getattr(obj, cache_attr) - def _load_entity(self, obj) -> E | None: + def _load_entity(self, obj: T) -> E | None: client = obj._client entity_data = obj.data_dict.get(self._bfabric_field) if self._optional and entity_data is None: diff --git a/src/bfabric/entities/core/relationship.py b/src/bfabric/entities/core/relationship.py index 0938ad30..f543e63c 100644 --- a/src/bfabric/entities/core/relationship.py +++ b/src/bfabric/entities/core/relationship.py @@ -2,9 +2,11 @@ import importlib from functools import cached_property -from typing import TypeVar, Generic +from typing import TypeVar, Generic, TYPE_CHECKING -from bfabric.entities.core.entity import Entity # type: ignore + +if TYPE_CHECKING: + from bfabric.entities.core.entity import Entity E = TypeVar("E", bound="Entity") diff --git a/src/bfabric/entities/dataset.py b/src/bfabric/entities/dataset.py index 4ca35cec..4ec1ef10 100644 --- a/src/bfabric/entities/dataset.py +++ b/src/bfabric/entities/dataset.py @@ -2,13 +2,15 @@ import tempfile from pathlib import Path -from typing import Any +from typing import Any, TYPE_CHECKING from polars import DataFrame -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class Dataset(Entity): """Immutable representation of a single dataset in B-Fabric. diff --git a/src/bfabric/entities/executable.py b/src/bfabric/entities/executable.py index fdbca5b7..7fffc96d 100644 --- a/src/bfabric/entities/executable.py +++ b/src/bfabric/entities/executable.py @@ -1,10 +1,12 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class Executable(Entity): ENDPOINT = "executable" diff --git a/src/bfabric/entities/externaljob.py b/src/bfabric/entities/externaljob.py index cbcfca38..146d9dae 100644 --- a/src/bfabric/entities/externaljob.py +++ b/src/bfabric/entities/externaljob.py @@ -3,11 +3,11 @@ from functools import cached_property from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.workunit import Workunit from bfabric.entities.executable import Executable diff --git a/src/bfabric/entities/multiplexid.py b/src/bfabric/entities/multiplexid.py index e05096bb..8801c7f5 100644 --- a/src/bfabric/entities/multiplexid.py +++ b/src/bfabric/entities/multiplexid.py @@ -1,10 +1,12 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class MultiplexId(Entity): ENDPOINT = "multiplexid" diff --git a/src/bfabric/entities/multiplexkit.py b/src/bfabric/entities/multiplexkit.py index 6ff4130d..6f78de8f 100644 --- a/src/bfabric/entities/multiplexkit.py +++ b/src/bfabric/entities/multiplexkit.py @@ -3,12 +3,12 @@ from functools import cached_property from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_many import HasMany if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.multiplexid import MultiplexId diff --git a/src/bfabric/entities/order.py b/src/bfabric/entities/order.py index e5f005d8..81b7b713 100644 --- a/src/bfabric/entities/order.py +++ b/src/bfabric/entities/order.py @@ -2,11 +2,11 @@ from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.project import Project diff --git a/src/bfabric/entities/parameter.py b/src/bfabric/entities/parameter.py index 3dc7c9ec..beb93720 100644 --- a/src/bfabric/entities/parameter.py +++ b/src/bfabric/entities/parameter.py @@ -1,10 +1,12 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class Parameter(Entity): ENDPOINT = "parameter" diff --git a/src/bfabric/entities/project.py b/src/bfabric/entities/project.py index 15f9036d..a85e3e02 100644 --- a/src/bfabric/entities/project.py +++ b/src/bfabric/entities/project.py @@ -1,10 +1,12 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class Project(Entity): ENDPOINT = "project" diff --git a/src/bfabric/entities/resource.py b/src/bfabric/entities/resource.py index b43b4d5e..7e0a40a3 100644 --- a/src/bfabric/entities/resource.py +++ b/src/bfabric/entities/resource.py @@ -2,11 +2,11 @@ from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.storage import Storage from bfabric.entities.workunit import Workunit diff --git a/src/bfabric/entities/storage.py b/src/bfabric/entities/storage.py index c13cecc2..4d73c5a8 100644 --- a/src/bfabric/entities/storage.py +++ b/src/bfabric/entities/storage.py @@ -2,11 +2,13 @@ from functools import cached_property from pathlib import Path -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric from bfabric.entities.core.entity import Entity +if TYPE_CHECKING: + from bfabric import Bfabric + class Storage(Entity): ENDPOINT = "storage" diff --git a/src/bfabric/entities/workunit.py b/src/bfabric/entities/workunit.py index 451005c4..2fbfdf37 100644 --- a/src/bfabric/entities/workunit.py +++ b/src/bfabric/entities/workunit.py @@ -5,12 +5,12 @@ import dateutil.parser -from bfabric import Bfabric from bfabric.entities.core.entity import Entity from bfabric.entities.core.has_many import HasMany from bfabric.entities.core.has_one import HasOne if TYPE_CHECKING: + from bfabric import Bfabric from bfabric.entities.application import Application from bfabric.entities.dataset import Dataset from bfabric.entities.externaljob import ExternalJob diff --git a/src/bfabric/experimental/entity_lookup_cache.py b/src/bfabric/experimental/entity_lookup_cache.py index dbbb2612..24c13c17 100644 --- a/src/bfabric/experimental/entity_lookup_cache.py +++ b/src/bfabric/experimental/entity_lookup_cache.py @@ -1,13 +1,14 @@ from __future__ import annotations from collections import defaultdict, OrderedDict -from collections.abc import Hashable from contextlib import contextmanager from typing import TypeVar, Generic, TYPE_CHECKING from loguru import logger if TYPE_CHECKING: + from collections.abc import Generator + from collections.abc import Hashable from bfabric.entities.core.entity import Entity # type: ignore T = TypeVar("T") @@ -79,7 +80,7 @@ def put(self, entity_type: type[Entity], entity_id: int, entity: Entity | None) @classmethod @contextmanager - def enable(cls, max_size: int = 0): + def enable(cls, max_size: int = 0) -> Generator[None, None, None]: """Context manager that enables the EntityLookupCache singleton instance, i.e. every entity lookup by ID within this context will be cached. The cache is cleared after the context exits. """ diff --git a/src/bfabric/experimental/workunit_definition.py b/src/bfabric/experimental/workunit_definition.py index 4b97a903..55719f75 100644 --- a/src/bfabric/experimental/workunit_definition.py +++ b/src/bfabric/experimental/workunit_definition.py @@ -1,14 +1,16 @@ from __future__ import annotations from pathlib import Path -from typing import Literal +from typing import Literal, TYPE_CHECKING import yaml from pydantic import BaseModel, ConfigDict, model_validator -from bfabric import Bfabric from bfabric.entities import Workunit +if TYPE_CHECKING: + from bfabric import Bfabric + class WorkunitExecutionDefinition(BaseModel): """Defines the execution details of a workunit.""" diff --git a/src/bfabric/results/result_container.py b/src/bfabric/results/result_container.py index ee25b55a..b113a318 100644 --- a/src/bfabric/results/result_container.py +++ b/src/bfabric/results/result_container.py @@ -2,11 +2,11 @@ import logging from typing import Any, TYPE_CHECKING, overload -from collections.abc import Iterator import bfabric.results.response_format_dict as formatter if TYPE_CHECKING: + from collections.abc import Iterator import polars from bfabric.errors import BfabricRequestError diff --git a/src/bfabric/utils/paginator.py b/src/bfabric/utils/paginator.py index 5bdddb20..d4004982 100644 --- a/src/bfabric/utils/paginator.py +++ b/src/bfabric/utils/paginator.py @@ -1,7 +1,10 @@ from __future__ import annotations import math -from collections.abc import Generator +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Generator # Single page query limit for BFabric API (as of time of writing, adapt if it changes) BFABRIC_QUERY_LIMIT = 100 From 7443981f33813564ace88f78b0d41afd85a6cbd3 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 10:03:55 +0100 Subject: [PATCH 109/144] add parameterized noxfile --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 5a2400b4..1a6e8c69 100644 --- a/noxfile.py +++ b/noxfile.py @@ -7,7 +7,7 @@ nox.options.default_venv_backend = "uv" -@nox.session +@nox.session(python=["3.9", "3.13"]) def tests(session): session.install(".[test]") session.run("uv", "pip", "list") From c8853dc0b074bee4ccc4f8e5daf25d4ec0c0c378 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 7 Nov 2024 11:16:40 +0100 Subject: [PATCH 110/144] fix cyclopts cli --- app_runner/pyproject.toml | 4 ++++ app_runner/src/app_runner/cli/app.py | 9 +++------ app_runner/src/app_runner/cli/chunk.py | 9 +++------ app_runner/src/app_runner/cli/inputs.py | 6 +++--- app_runner/src/app_runner/cli/outputs.py | 10 ++++------ app_runner/src/app_runner/cli/validate.py | 5 +---- 6 files changed, 18 insertions(+), 25 deletions(-) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 21ca1392..0a0ce758 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -49,3 +49,7 @@ indent-width = 4 [tool.ruff.lint] select = ["ANN", "BLE", "D103", "E", "EXE", "F", "N", "PLW", "PTH", "SIM", "TCH", "UP", "W191"] ignore = ["ANN101", "ANN102"] + +[tool.ruff.lint.per-file-ignores] +# This is needed because of false positives in cyclopts code +"**/app_runner/cli/**" = ["TCH001", "TCH002", "TCH003"] diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py index 3531b5be..e51deb1c 100644 --- a/app_runner/src/app_runner/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -1,18 +1,15 @@ from __future__ import annotations +from pathlib import Path import cyclopts import yaml -from bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging from app_runner.app_runner._spec import AppSpec from app_runner.app_runner.runner import run_app, Runner +from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging from bfabric.experimental.entity_lookup_cache import EntityLookupCache -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pathlib import Path app_app = cyclopts.App("app", help="Run an app.") diff --git a/app_runner/src/app_runner/cli/chunk.py b/app_runner/src/app_runner/cli/chunk.py index f8829a2b..4240c218 100644 --- a/app_runner/src/app_runner/cli/chunk.py +++ b/app_runner/src/app_runner/cli/chunk.py @@ -1,18 +1,15 @@ from __future__ import annotations +from pathlib import Path import cyclopts import yaml -from bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging from app_runner.app_runner._spec import AppSpec from app_runner.app_runner.runner import run_app, Runner +from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging from bfabric.experimental.entity_lookup_cache import EntityLookupCache -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pathlib import Path app_chunk = cyclopts.App("chunk", help="Run an app on a chunk. You can create the chunks with `app dispatch`.") diff --git a/app_runner/src/app_runner/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py index 08987b32..b19c499b 100644 --- a/app_runner/src/app_runner/cli/inputs.py +++ b/app_runner/src/app_runner/cli/inputs.py @@ -4,16 +4,16 @@ import cyclopts -from bfabric import Bfabric -from bfabric.cli_formatting import setup_script_logging from app_runner.input_preparation import prepare_folder -from app_runner.input_preparation.spec import InputsSpec from app_runner.input_preparation.integrity import IntegrityState from app_runner.input_preparation.list_inputs import ( list_input_states, print_input_states, FileState, ) +from app_runner.input_preparation.spec import InputsSpec +from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging app_inputs = cyclopts.App("inputs", help="Prepare input files for an app.") diff --git a/app_runner/src/app_runner/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py index 2fe45a53..f062b8cb 100644 --- a/app_runner/src/app_runner/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -1,16 +1,14 @@ from __future__ import annotations +from pathlib import Path + import cyclopts +from app_runner.output_registration.register import register_all +from app_runner.output_registration.spec import OutputsSpec from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from bfabric.entities import Workunit -from app_runner.output_registration.spec import OutputsSpec -from app_runner.output_registration.register import register_all -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pathlib import Path app_outputs = cyclopts.App("outputs", help="Register output files for an app.") diff --git a/app_runner/src/app_runner/cli/validate.py b/app_runner/src/app_runner/cli/validate.py index 76d5c511..96ca35f4 100644 --- a/app_runner/src/app_runner/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -1,5 +1,6 @@ from __future__ import annotations +from pathlib import Path import cyclopts import rich @@ -9,10 +10,6 @@ from app_runner.app_runner._spec import AppSpec from app_runner.input_preparation.spec import InputsSpec from app_runner.output_registration.spec import OutputsSpec -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pathlib import Path app_validate = cyclopts.App("validate", help="Validate yaml files.") From 8b7ac74e6f6fdae8ce0d9716d5c33a3096ffb8c5 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 8 Nov 2024 10:17:49 +0100 Subject: [PATCH 111/144] sort imports --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1a6e8c69..346c2bef 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,8 +1,8 @@ -import nox import shutil -from tempfile import TemporaryDirectory from pathlib import Path +from tempfile import TemporaryDirectory +import nox nox.options.default_venv_backend = "uv" From eb996c4d545194d10ac6e1d5e7fd33a1f44d98d5 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 8 Nov 2024 10:43:12 +0100 Subject: [PATCH 112/144] typo --- app_runner/docs/architecture/uml/app_model.plantuml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/docs/architecture/uml/app_model.plantuml b/app_runner/docs/architecture/uml/app_model.plantuml index 75a9d1de..b5f6c307 100644 --- a/app_runner/docs/architecture/uml/app_model.plantuml +++ b/app_runner/docs/architecture/uml/app_model.plantuml @@ -11,7 +11,7 @@ group dispatch app AppRunner -> App: dispatch(app) App -> BFabric: query BFabric -> App: response - App -> AppRunner: "chunks.yml", "chunk1/input.yml", ... + App -> AppRunner: "chunks.yml", "chunk1/inputs.yml", ... end loop for each chunk From 08d68f3cd6c9eba54640005ba45712190bcf2e45 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 13:06:02 +0100 Subject: [PATCH 113/144] add writeable mount points --- app_runner/docs/changelog.md | 4 ++++ app_runner/src/app_runner/app_runner/_spec.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index f37119a4..e5d0409d 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +### Added + +- `MountOptions.writeable` list for writeable mount points. + ## \[0.0.3\] - 2024-10-24 ### Added diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 5c6cdae1..88c9ed17 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -22,6 +22,7 @@ def to_shell(self) -> list[str]: class MountOptions(BaseModel): work_dir_target: Path | None = None read_only: list[tuple[Path, Path]] = [] + writeable: list[tuple[Path, Path]] = [] share_bfabric_config: bool = True def collect(self, work_dir: Path) -> list[tuple[Path, Path, bool]]: @@ -34,6 +35,8 @@ def collect(self, work_dir: Path) -> list[tuple[Path, Path, bool]]: mounts.append((work_dir, work_dir_target, False)) for source, target in self.read_only: mounts.append((source, target, True)) + for source, target in self.writeable: + mounts.append((source, target, False)) return [(source.expanduser().absolute(), target, read_only) for source, target, read_only in mounts] From 9f44b8b54d4061f0ffd51ba6fdab66ba1e8c08c7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 13:06:50 +0100 Subject: [PATCH 114/144] app-runner 0.0.4 --- app_runner/docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index e5d0409d..11ca87d4 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +## \[0.0.4\] - 2024-11-11 + ### Added - `MountOptions.writeable` list for writeable mount points. From 28e3d7d73be14f0fe19632660eabd7b516d37803 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 13:36:29 +0100 Subject: [PATCH 115/144] allow specifying mac address for docker container --- app_runner/docs/changelog.md | 4 ++++ app_runner/src/app_runner/app_runner/_spec.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index 11ca87d4..68013953 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +### Added + +- `CommandDocker.mac_address`: allows to specify the MAC address of the container. + ## \[0.0.4\] - 2024-11-11 ### Added diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 88c9ed17..8d7dcd4f 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -48,6 +48,7 @@ class CommandDocker(BaseModel): entrypoint: str | None = None engine: str = "docker" env: dict[str, str] = {} + mac_address: str | None = None mounts: MountOptions = MountOptions() def to_shell(self, work_dir: Path | None = None) -> list[str]: @@ -64,6 +65,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: for key, value in self.env.items(): env_args.append("--env") env_args.append(f"{key}={shlex.quote(value)}") + mac_address_arg = ["--mac-address", self.mac_address] if self.mac_address else [] return [ self.engine, @@ -74,6 +76,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: *mount_args, *entrypoint_arg, *env_args, + *mac_address_arg, self.image, *shlex.split(self.command), ] From 47d1b9487b3c92c7835d34e9be02286ead4bfcd4 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 13:41:09 +0100 Subject: [PATCH 116/144] add custom_args field --- app_runner/docs/changelog.md | 1 + app_runner/src/app_runner/app_runner/_spec.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index 68013953..1a4cb39b 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - `CommandDocker.mac_address`: allows to specify the MAC address of the container. +- `CommandDocker.custom_args`: allows to specify arbitrary additional arguments to the `docker run` command. ## \[0.0.4\] - 2024-11-11 diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 8d7dcd4f..88e93d15 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -50,6 +50,7 @@ class CommandDocker(BaseModel): env: dict[str, str] = {} mac_address: str | None = None mounts: MountOptions = MountOptions() + custom_args: list[str] = [] def to_shell(self, work_dir: Path | None = None) -> list[str]: work_dir = (work_dir or Path()).expanduser().absolute() @@ -77,6 +78,7 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: *entrypoint_arg, *env_args, *mac_address_arg, + *self.custom_args, self.image, *shlex.split(self.command), ] From 9ec8ac1d5a49e1484528ed2bd217e8c913015094 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 13:42:39 +0100 Subject: [PATCH 117/144] app_runner 0.0.5 --- app_runner/docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index 1a4cb39b..b7f4a559 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +## \[0.0.5\] - 2024-11-11 + ### Added - `CommandDocker.mac_address`: allows to specify the MAC address of the container. From a9f6be21e384b3a8b4cbe86fa077fabd6503549c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 15:53:32 +0100 Subject: [PATCH 118/144] make the parsing of app spec more strict --- app_runner/docs/changelog.md | 5 +++++ app_runner/src/app_runner/app_runner/_spec.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index b7f4a559..f1459f09 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +### Changed + +- The app spec is now strict and will fail parsing if there are any unknown fields in the spec. It is better to find + this type of error early. + ## \[0.0.5\] - 2024-11-11 ### Added diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/app_runner/_spec.py index 88e93d15..08bc56c7 100644 --- a/app_runner/src/app_runner/app_runner/_spec.py +++ b/app_runner/src/app_runner/app_runner/_spec.py @@ -5,13 +5,15 @@ from pathlib import Path from typing import Literal, Annotated -from pydantic import BaseModel, Discriminator +from pydantic import BaseModel, Discriminator, ConfigDict # TODO: This is kept very simple for now, so that it could be easily extended in the future. class CommandShell(BaseModel): + model_config = ConfigDict(extra="forbid") + type: Literal["shell"] = "shell" command: str @@ -20,6 +22,8 @@ def to_shell(self) -> list[str]: class MountOptions(BaseModel): + model_config = ConfigDict(extra="forbid") + work_dir_target: Path | None = None read_only: list[tuple[Path, Path]] = [] writeable: list[tuple[Path, Path]] = [] @@ -41,6 +45,8 @@ def collect(self, work_dir: Path) -> list[tuple[Path, Path, bool]]: class CommandDocker(BaseModel): + model_config = ConfigDict(extra="forbid") + # TODO not sure if to call this "docker", since "docker-compatible" would be appropriate type: Literal["docker"] = "docker" image: str @@ -88,12 +94,16 @@ def to_shell(self, work_dir: Path | None = None) -> list[str]: class CommandsSpec(BaseModel): + model_config = ConfigDict(extra="forbid") + dispatch: Command process: Command collect: Command class AppSpec(BaseModel): + model_config = ConfigDict(extra="forbid") + commands: CommandsSpec # Note: While we use the old submitter, this is still necessary reuse_default_resource: bool = True From e3a2ee74d53d300a59bb9a765bbebe5a61ed3fc6 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 11 Nov 2024 16:07:25 +0100 Subject: [PATCH 119/144] bump version --- app_runner/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 0a0ce758..2d63a4f6 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "app_runner" description = "Application runner for B-Fabric apps" -version = "0.0.2" +version = "0.0.5" license = { text = "GPL-3.0" } authors = [ {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, From b34fdb56bfcc090ee0eb07bbf30854160a96d8a2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 09:01:42 +0100 Subject: [PATCH 120/144] print app_runner log messages --- app_runner/docs/changelog.md | 1 + src/bfabric/cli_formatting.py | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index f1459f09..3b1837c0 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - The app spec is now strict and will fail parsing if there are any unknown fields in the spec. It is better to find this type of error early. +- Log messages originating in `app_runner` should be printed now, they were previously muted (unintentionally). ## \[0.0.5\] - 2024-11-11 diff --git a/src/bfabric/cli_formatting.py b/src/bfabric/cli_formatting.py index 35e6595c..77156948 100644 --- a/src/bfabric/cli_formatting.py +++ b/src/bfabric/cli_formatting.py @@ -22,12 +22,11 @@ def setup_script_logging(debug: bool = False) -> None: if os.environ.get(setup_flag_key, "0") == "1": return logger.remove() + packages = ["bfabric", "bfabric_scripts", "app_runner", "__main__"] if not (debug or os.environ.get("BFABRICPY_DEBUG")): - logger.add(sys.stderr, filter="bfabric", level="INFO", format="{level} {message}") - logger.add(sys.stderr, filter="bfabric_scripts", level="INFO", format="{level} {message}") - logger.add(sys.stderr, filter="__main__", level="INFO", format="{level} {message}") + for package in packages: + logger.add(sys.stderr, filter=package, level="INFO", format="{level} {message}") else: - logger.add(sys.stderr, filter="bfabric", level="DEBUG") - logger.add(sys.stderr, filter="bfabric_scripts", level="DEBUG") - logger.add(sys.stderr, filter="__main__", level="DEBUG") + for package in packages: + logger.add(sys.stderr, filter=package, level="DEBUG") os.environ[setup_flag_key] = "1" From eafda600117866cbeb8312bdc5ad86361e44a273 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 11:03:57 +0100 Subject: [PATCH 121/144] initial on release action --- .github/workflows/build_app_runner.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index 2a07b84b..45072520 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -1,6 +1,10 @@ name: Build App Runner on: workflow_dispatch: + release: + types: [published] + tags: + - "app-runner/*" jobs: build_app_runner: name: Build App Runner @@ -19,3 +23,20 @@ jobs: with: name: app_runner_linux_x86_64 path: build-output + publish_release_artifact: + name: Publish Release Artifact + runs-on: ubuntu-latest + needs: build_app_runner + if: github.event_name == 'release' + steps: + - name: Download Artifact + uses: actions/download-artifact@v4 + with: + name: app_runner_linux_x86_64 + - name: Upload Release Assets + uses: softprops/action-gh-release@v1 + with: + files: | + app_runner_linux_x86_64/* + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From cb59e861c39dd335a3448f3d47658fc936342144 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 11:10:53 +0100 Subject: [PATCH 122/144] specify path and print for debugging --- .github/workflows/build_app_runner.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index 45072520..e7a87451 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -33,6 +33,10 @@ jobs: uses: actions/download-artifact@v4 with: name: app_runner_linux_x86_64 + path: app_runner_linux_x86_64 + - name: Print folder structure + run: | + tree - name: Upload Release Assets uses: softprops/action-gh-release@v1 with: From 9888068f909092913b28aa5d7ca090c5c62a61af Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 11:17:49 +0100 Subject: [PATCH 123/144] explicitly zip the file --- .github/workflows/build_app_runner.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index e7a87451..6be73074 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -34,6 +34,11 @@ jobs: with: name: app_runner_linux_x86_64 path: app_runner_linux_x86_64 + - name: Zip for release + run: | + cd app_runner_linux_x86_64 + zip -r app_runner_linux_x86_64.zip * + ls -l - name: Print folder structure run: | tree @@ -41,6 +46,6 @@ jobs: uses: softprops/action-gh-release@v1 with: files: | - app_runner_linux_x86_64/* + app_runner_linux_x86_64/app_runner_linux_x86_64.zip env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From dfab5ac1cc3b9aa3580135dea539e667f39207f7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 12:17:29 +0100 Subject: [PATCH 124/144] app runner 0.0.6 --- app_runner/docs/changelog.md | 4 ++++ app_runner/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index 3b1837c0..be62f0d6 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +## \[0.0.6\] - 2024-11-14 + +First version with CD that will trigger the deployment automatically. + ### Changed - The app spec is now strict and will fail parsing if there are any unknown fields in the spec. It is better to find diff --git a/app_runner/pyproject.toml b/app_runner/pyproject.toml index 2d63a4f6..33dc7a80 100644 --- a/app_runner/pyproject.toml +++ b/app_runner/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "app_runner" description = "Application runner for B-Fabric apps" -version = "0.0.5" +version = "0.0.6" license = { text = "GPL-3.0" } authors = [ {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, From e5cbddca80095d2ae45fdf727d8efed69f06c283 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 13:50:31 +0100 Subject: [PATCH 125/144] add test for output registration spec --- tests/app_runner/__init__.py | 0 .../output_registration/__init__.py | 0 .../output_registration/test_spec.py | 47 +++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 tests/app_runner/__init__.py create mode 100644 tests/app_runner/output_registration/__init__.py create mode 100644 tests/app_runner/output_registration/test_spec.py diff --git a/tests/app_runner/__init__.py b/tests/app_runner/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/app_runner/output_registration/__init__.py b/tests/app_runner/output_registration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/app_runner/output_registration/test_spec.py b/tests/app_runner/output_registration/test_spec.py new file mode 100644 index 00000000..db89a705 --- /dev/null +++ b/tests/app_runner/output_registration/test_spec.py @@ -0,0 +1,47 @@ +import pytest +import yaml + +from app_runner.output_registration.spec import OutputsSpec, CopyResourceSpec, SaveDatasetSpec + + +@pytest.fixture() +def parsed() -> OutputsSpec: + return OutputsSpec( + outputs=[ + CopyResourceSpec( + local_path="local_path", + store_entry_path="store_entry_path", + store_folder_path=None, + update_existing="no", + protocol="scp", + ), + SaveDatasetSpec( + local_path="local_path", separator="separator", name=None, has_header=True, invalid_characters="" + ), + ] + ) + + +@pytest.fixture() +def serialized() -> str: + return """outputs: +- local_path: local_path + protocol: scp + store_entry_path: store_entry_path + store_folder_path: null + type: bfabric_copy_resource + update_existing: 'no' +- has_header: true + invalid_characters: '' + local_path: local_path + name: null + separator: separator + type: bfabric_dataset""" + + +def test_serialize(parsed, serialized): + assert yaml.safe_dump(parsed.model_dump(mode="json")).strip() == serialized.strip() + + +def test_parse(parsed, serialized): + assert OutputsSpec.model_validate(yaml.safe_load(serialized)) == parsed From 87798677f5bd215d6dec40338ac31f0ff991046a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 13:52:43 +0100 Subject: [PATCH 126/144] fix outputs spec parsing --- app_runner/docs/changelog.md | 4 ++++ app_runner/src/app_runner/output_registration/spec.py | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index be62f0d6..0a45c1e0 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -8,6 +8,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). First version with CD that will trigger the deployment automatically. +### Fixed + +- Output spec was broken since `Path` was moved into `if TYPE_CHECKING` block. + ### Changed - The app spec is now strict and will fail parsing if there are any unknown fields in the spec. It is better to find diff --git a/app_runner/src/app_runner/output_registration/spec.py b/app_runner/src/app_runner/output_registration/spec.py index 98a8a4f9..8660e6fa 100644 --- a/app_runner/src/app_runner/output_registration/spec.py +++ b/app_runner/src/app_runner/output_registration/spec.py @@ -1,14 +1,12 @@ from __future__ import annotations import enum -from typing import Literal, Annotated, TYPE_CHECKING +from pathlib import Path # noqa: TCH003 +from typing import Literal, Annotated import yaml from pydantic import BaseModel, ConfigDict, Field -if TYPE_CHECKING: - from pathlib import Path - class UpdateExisting(enum.Enum): NO = "no" From d49aa2224a3a169f0e88b102feaf362ae54ab13e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 13:54:03 +0100 Subject: [PATCH 127/144] run bfabric unit tests only in "tests" --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 346c2bef..63520073 100644 --- a/noxfile.py +++ b/noxfile.py @@ -11,7 +11,7 @@ def tests(session): session.install(".[test]") session.run("uv", "pip", "list") - session.run("pytest") + session.run("pytest", "tests/bfabric", "tests/bfabric_scripts") @nox.session From cadcd3d3b70df929c999710ac277a037aeeb7f93 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 13:56:19 +0100 Subject: [PATCH 128/144] add app_runner tests --- noxfile.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/noxfile.py b/noxfile.py index 63520073..2fc9c538 100644 --- a/noxfile.py +++ b/noxfile.py @@ -14,6 +14,15 @@ def tests(session): session.run("pytest", "tests/bfabric", "tests/bfabric_scripts") +@nox.session(python=["3.13"]) +def test_app_runner(session): + # TODO this one has a problem that bfabric gets installed from `@main` (so it could break CI) + session.install(".") + session.install("./app_runner[test]") + session.run("uv", "pip", "list") + session.run("pytest", "tests/app_runner") + + @nox.session def test_py_typed(session): """Verify py.typed is properly installed with the package.""" From 091f703ce03aea06f075fb6afc5915fc38cb2abb Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 14:06:12 +0100 Subject: [PATCH 129/144] add test for input preparation spec --- .../app_runner/input_preparation/__init__.py | 0 .../app_runner/input_preparation/test_spec.py | 43 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tests/app_runner/input_preparation/__init__.py create mode 100644 tests/app_runner/input_preparation/test_spec.py diff --git a/tests/app_runner/input_preparation/__init__.py b/tests/app_runner/input_preparation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/app_runner/input_preparation/test_spec.py b/tests/app_runner/input_preparation/test_spec.py new file mode 100644 index 00000000..801fc201 --- /dev/null +++ b/tests/app_runner/input_preparation/test_spec.py @@ -0,0 +1,43 @@ +import pytest +import yaml + +from app_runner.input_preparation.spec import InputsSpec, ResourceSpec, DatasetSpec + + +@pytest.fixture() +def parsed() -> InputsSpec: + return InputsSpec( + inputs=[ + ResourceSpec( + id=1, + filename="filename", + check_checksum=True, + ), + DatasetSpec( + id=2, + filename="filename", + separator=",", + ), + ] + ) + + +@pytest.fixture() +def serialized() -> str: + return """inputs: +- check_checksum: true + filename: filename + id: 1 + type: bfabric_resource +- filename: filename + id: 2 + separator: ',' + type: bfabric_dataset""" + + +def test_serialize(parsed, serialized): + assert yaml.safe_dump(parsed.model_dump(mode="json")).strip() == serialized.strip() + + +def test_parse(parsed, serialized): + assert InputsSpec.model_validate(yaml.safe_load(serialized)) == parsed From fb0ab4f3139976bdf1401e1540409fcfd1024b87 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 14:07:25 +0100 Subject: [PATCH 130/144] move to combined folder (to be reflected in the code later) --- tests/app_runner/output_registration/__init__.py | 0 tests/app_runner/{input_preparation => specs}/__init__.py | 0 .../{input_preparation/test_spec.py => specs/test_inputs_spec.py} | 0 .../test_spec.py => specs/test_outputs_spec.py} | 0 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/app_runner/output_registration/__init__.py rename tests/app_runner/{input_preparation => specs}/__init__.py (100%) rename tests/app_runner/{input_preparation/test_spec.py => specs/test_inputs_spec.py} (100%) rename tests/app_runner/{output_registration/test_spec.py => specs/test_outputs_spec.py} (100%) diff --git a/tests/app_runner/output_registration/__init__.py b/tests/app_runner/output_registration/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/app_runner/input_preparation/__init__.py b/tests/app_runner/specs/__init__.py similarity index 100% rename from tests/app_runner/input_preparation/__init__.py rename to tests/app_runner/specs/__init__.py diff --git a/tests/app_runner/input_preparation/test_spec.py b/tests/app_runner/specs/test_inputs_spec.py similarity index 100% rename from tests/app_runner/input_preparation/test_spec.py rename to tests/app_runner/specs/test_inputs_spec.py diff --git a/tests/app_runner/output_registration/test_spec.py b/tests/app_runner/specs/test_outputs_spec.py similarity index 100% rename from tests/app_runner/output_registration/test_spec.py rename to tests/app_runner/specs/test_outputs_spec.py From 282e702bc7c52a4efc0984ec5dd3d9539551536b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 14:11:26 +0100 Subject: [PATCH 131/144] add test for app spec as well --- tests/app_runner/specs/test_app_spec.py | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/app_runner/specs/test_app_spec.py diff --git a/tests/app_runner/specs/test_app_spec.py b/tests/app_runner/specs/test_app_spec.py new file mode 100644 index 00000000..57bceb69 --- /dev/null +++ b/tests/app_runner/specs/test_app_spec.py @@ -0,0 +1,54 @@ +import pytest +import yaml + +from app_runner.app_runner._spec import AppSpec, CommandShell, CommandDocker, MountOptions, CommandsSpec + + +@pytest.fixture() +def parsed() -> AppSpec: + return AppSpec( + commands=CommandsSpec( + dispatch=CommandShell(command="dispatch"), + process=CommandDocker( + image="image", command="command", mounts=MountOptions(read_only=[("/host", "/container")]) + ), + collect=CommandShell(command="collect"), + ), + reuse_default_resource=True, + ) + + +@pytest.fixture() +def serialized() -> str: + return """commands: + collect: + command: collect + type: shell + dispatch: + command: dispatch + type: shell + process: + command: command + custom_args: [] + engine: docker + entrypoint: null + env: {} + image: image + mac_address: null + mounts: + read_only: + - - /host + - /container + share_bfabric_config: true + work_dir_target: null + writeable: [] + type: docker +reuse_default_resource: true""" + + +def test_serialize(parsed, serialized): + assert yaml.safe_dump(parsed.model_dump(mode="json")).strip() == serialized.strip() + + +def test_parse(parsed, serialized): + assert AppSpec.model_validate(yaml.safe_load(serialized)) == parsed From c8de50608bb88a13e775a24b2944c8f88aad5994 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 14 Nov 2024 14:18:16 +0100 Subject: [PATCH 132/144] move specs classes into a common module --- app_runner/docs/specs/app_specification.md | 9 +++++++++ app_runner/docs/specs/input_specification.md | 2 +- app_runner/docs/specs/output_specification.md | 2 +- app_runner/src/app_runner/app_runner/runner.py | 2 +- app_runner/src/app_runner/cli/app.py | 2 +- app_runner/src/app_runner/cli/chunk.py | 2 +- app_runner/src/app_runner/cli/inputs.py | 2 +- app_runner/src/app_runner/cli/outputs.py | 2 +- app_runner/src/app_runner/cli/validate.py | 6 +++--- app_runner/src/app_runner/input_preparation/integrity.py | 2 +- .../src/app_runner/input_preparation/list_inputs.py | 2 +- app_runner/src/app_runner/input_preparation/prepare.py | 2 +- .../src/app_runner/output_registration/register.py | 2 +- app_runner/src/app_runner/specs/__init__.py | 0 .../{app_runner/_spec.py => specs/app_spec.py} | 0 .../{input_preparation/spec.py => specs/inputs_spec.py} | 0 .../spec.py => specs/outputs_spec.py} | 0 tests/app_runner/specs/test_app_spec.py | 2 +- tests/app_runner/specs/test_inputs_spec.py | 2 +- tests/app_runner/specs/test_outputs_spec.py | 2 +- 20 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 app_runner/src/app_runner/specs/__init__.py rename app_runner/src/app_runner/{app_runner/_spec.py => specs/app_spec.py} (100%) rename app_runner/src/app_runner/{input_preparation/spec.py => specs/inputs_spec.py} (100%) rename app_runner/src/app_runner/{output_registration/spec.py => specs/outputs_spec.py} (100%) diff --git a/app_runner/docs/specs/app_specification.md b/app_runner/docs/specs/app_specification.md index b4620f6f..0e09086a 100644 --- a/app_runner/docs/specs/app_specification.md +++ b/app_runner/docs/specs/app_specification.md @@ -2,3 +2,12 @@ TODO: not clear if this same document should also explain the individual steps, or if it would make sense to first describe the app anatomy in a separate document with figures etc. and then list how to specify it + +## Reference + +```{eval-rst} +.. automodule:: app_runner.specs.app_spec + :members: + :undoc-members: + :show-inheritance: +``` diff --git a/app_runner/docs/specs/input_specification.md b/app_runner/docs/specs/input_specification.md index 44d48702..2148c46a 100644 --- a/app_runner/docs/specs/input_specification.md +++ b/app_runner/docs/specs/input_specification.md @@ -85,7 +85,7 @@ bfabric-app-runner inputs list --check inputs.yml . ## Reference ```{eval-rst} -.. automodule:: app_runner.input_preparation.spec +.. automodule:: app_runner.specs.inputs_spec :members: :undoc-members: :show-inheritance: diff --git a/app_runner/docs/specs/output_specification.md b/app_runner/docs/specs/output_specification.md index b6afd723..668f95b3 100644 --- a/app_runner/docs/specs/output_specification.md +++ b/app_runner/docs/specs/output_specification.md @@ -53,7 +53,7 @@ Please note: ## Reference ```{eval-rst} -.. automodule:: app_runner.output_registration.spec +.. automodule:: app_runner.specs.outputs_spec :members: :undoc-members: :show-inheritance: diff --git a/app_runner/src/app_runner/app_runner/runner.py b/app_runner/src/app_runner/app_runner/runner.py index 1eaabbd0..abfc9385 100644 --- a/app_runner/src/app_runner/app_runner/runner.py +++ b/app_runner/src/app_runner/app_runner/runner.py @@ -14,7 +14,7 @@ from app_runner.output_registration import register_outputs if TYPE_CHECKING: - from app_runner.app_runner._spec import AppSpec + from app_runner.specs.app_spec import AppSpec from bfabric import Bfabric diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py index e51deb1c..5aa593b6 100644 --- a/app_runner/src/app_runner/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -5,7 +5,7 @@ import cyclopts import yaml -from app_runner.app_runner._spec import AppSpec +from app_runner.specs.app_spec import AppSpec from app_runner.app_runner.runner import run_app, Runner from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging diff --git a/app_runner/src/app_runner/cli/chunk.py b/app_runner/src/app_runner/cli/chunk.py index 4240c218..fc6a6060 100644 --- a/app_runner/src/app_runner/cli/chunk.py +++ b/app_runner/src/app_runner/cli/chunk.py @@ -5,7 +5,7 @@ import cyclopts import yaml -from app_runner.app_runner._spec import AppSpec +from app_runner.specs.app_spec import AppSpec from app_runner.app_runner.runner import run_app, Runner from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging diff --git a/app_runner/src/app_runner/cli/inputs.py b/app_runner/src/app_runner/cli/inputs.py index b19c499b..e7518877 100644 --- a/app_runner/src/app_runner/cli/inputs.py +++ b/app_runner/src/app_runner/cli/inputs.py @@ -11,7 +11,7 @@ print_input_states, FileState, ) -from app_runner.input_preparation.spec import InputsSpec +from app_runner.specs.inputs_spec import InputsSpec from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging diff --git a/app_runner/src/app_runner/cli/outputs.py b/app_runner/src/app_runner/cli/outputs.py index f062b8cb..5e9e42a3 100644 --- a/app_runner/src/app_runner/cli/outputs.py +++ b/app_runner/src/app_runner/cli/outputs.py @@ -5,7 +5,7 @@ import cyclopts from app_runner.output_registration.register import register_all -from app_runner.output_registration.spec import OutputsSpec +from app_runner.specs.outputs_spec import OutputsSpec from bfabric import Bfabric from bfabric.cli_formatting import setup_script_logging from bfabric.entities import Workunit diff --git a/app_runner/src/app_runner/cli/validate.py b/app_runner/src/app_runner/cli/validate.py index 96ca35f4..514bd2c1 100644 --- a/app_runner/src/app_runner/cli/validate.py +++ b/app_runner/src/app_runner/cli/validate.py @@ -7,9 +7,9 @@ import rich.pretty import yaml -from app_runner.app_runner._spec import AppSpec -from app_runner.input_preparation.spec import InputsSpec -from app_runner.output_registration.spec import OutputsSpec +from app_runner.specs.app_spec import AppSpec +from app_runner.specs.inputs_spec import InputsSpec +from app_runner.specs.outputs_spec import OutputsSpec app_validate = cyclopts.App("validate", help="Validate yaml files.") diff --git a/app_runner/src/app_runner/input_preparation/integrity.py b/app_runner/src/app_runner/input_preparation/integrity.py index 222e4665..b6bf9d33 100644 --- a/app_runner/src/app_runner/input_preparation/integrity.py +++ b/app_runner/src/app_runner/input_preparation/integrity.py @@ -3,7 +3,7 @@ from enum import Enum from bfabric.entities import Resource, Dataset -from app_runner.input_preparation.spec import InputSpecType, ResourceSpec, DatasetSpec +from app_runner.specs.inputs_spec import InputSpecType, ResourceSpec, DatasetSpec from app_runner.util.checksums import md5sum from typing import TYPE_CHECKING diff --git a/app_runner/src/app_runner/input_preparation/list_inputs.py b/app_runner/src/app_runner/input_preparation/list_inputs.py index 2f853e98..68cb81d1 100644 --- a/app_runner/src/app_runner/input_preparation/list_inputs.py +++ b/app_runner/src/app_runner/input_preparation/list_inputs.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from app_runner.input_preparation.spec import InputSpecType + from app_runner.specs.inputs_spec import InputSpecType from pathlib import Path from bfabric.bfabric import Bfabric diff --git a/app_runner/src/app_runner/input_preparation/prepare.py b/app_runner/src/app_runner/input_preparation/prepare.py index 1bca285c..a67147d5 100644 --- a/app_runner/src/app_runner/input_preparation/prepare.py +++ b/app_runner/src/app_runner/input_preparation/prepare.py @@ -7,7 +7,7 @@ from app_runner.input_preparation.integrity import IntegrityState from app_runner.input_preparation.list_inputs import list_input_states -from app_runner.input_preparation.spec import ( +from app_runner.specs.inputs_spec import ( ResourceSpec, DatasetSpec, InputSpecType, diff --git a/app_runner/src/app_runner/output_registration/register.py b/app_runner/src/app_runner/output_registration/register.py index 3e8ae7cd..a14a8bb2 100644 --- a/app_runner/src/app_runner/output_registration/register.py +++ b/app_runner/src/app_runner/output_registration/register.py @@ -4,7 +4,7 @@ from loguru import logger from bfabric.entities import Storage, Workunit -from app_runner.output_registration.spec import ( +from app_runner.specs.outputs_spec import ( CopyResourceSpec, UpdateExisting, OutputsSpec, diff --git a/app_runner/src/app_runner/specs/__init__.py b/app_runner/src/app_runner/specs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app_runner/src/app_runner/app_runner/_spec.py b/app_runner/src/app_runner/specs/app_spec.py similarity index 100% rename from app_runner/src/app_runner/app_runner/_spec.py rename to app_runner/src/app_runner/specs/app_spec.py diff --git a/app_runner/src/app_runner/input_preparation/spec.py b/app_runner/src/app_runner/specs/inputs_spec.py similarity index 100% rename from app_runner/src/app_runner/input_preparation/spec.py rename to app_runner/src/app_runner/specs/inputs_spec.py diff --git a/app_runner/src/app_runner/output_registration/spec.py b/app_runner/src/app_runner/specs/outputs_spec.py similarity index 100% rename from app_runner/src/app_runner/output_registration/spec.py rename to app_runner/src/app_runner/specs/outputs_spec.py diff --git a/tests/app_runner/specs/test_app_spec.py b/tests/app_runner/specs/test_app_spec.py index 57bceb69..d520f6e0 100644 --- a/tests/app_runner/specs/test_app_spec.py +++ b/tests/app_runner/specs/test_app_spec.py @@ -1,7 +1,7 @@ import pytest import yaml -from app_runner.app_runner._spec import AppSpec, CommandShell, CommandDocker, MountOptions, CommandsSpec +from app_runner.specs.app_spec import AppSpec, CommandShell, CommandDocker, MountOptions, CommandsSpec @pytest.fixture() diff --git a/tests/app_runner/specs/test_inputs_spec.py b/tests/app_runner/specs/test_inputs_spec.py index 801fc201..ea802ddb 100644 --- a/tests/app_runner/specs/test_inputs_spec.py +++ b/tests/app_runner/specs/test_inputs_spec.py @@ -1,7 +1,7 @@ import pytest import yaml -from app_runner.input_preparation.spec import InputsSpec, ResourceSpec, DatasetSpec +from app_runner.specs.inputs_spec import InputsSpec, ResourceSpec, DatasetSpec @pytest.fixture() diff --git a/tests/app_runner/specs/test_outputs_spec.py b/tests/app_runner/specs/test_outputs_spec.py index db89a705..66bd13b2 100644 --- a/tests/app_runner/specs/test_outputs_spec.py +++ b/tests/app_runner/specs/test_outputs_spec.py @@ -1,7 +1,7 @@ import pytest import yaml -from app_runner.output_registration.spec import OutputsSpec, CopyResourceSpec, SaveDatasetSpec +from app_runner.specs.outputs_spec import OutputsSpec, CopyResourceSpec, SaveDatasetSpec @pytest.fixture() From d860561b9db13d3fe30a09ec767877d870f330d4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 12:22:50 +0000 Subject: [PATCH 133/144] Bump softprops/action-gh-release from 1 to 2 in the actions group Bumps the actions group with 1 update: [softprops/action-gh-release](https://github.com/softprops/action-gh-release). Updates `softprops/action-gh-release` from 1 to 2 - [Release notes](https://github.com/softprops/action-gh-release/releases) - [Changelog](https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md) - [Commits](https://github.com/softprops/action-gh-release/compare/v1...v2) --- updated-dependencies: - dependency-name: softprops/action-gh-release dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions ... Signed-off-by: dependabot[bot] --- .github/workflows/build_app_runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_app_runner.yml b/.github/workflows/build_app_runner.yml index 6be73074..ce73ba00 100644 --- a/.github/workflows/build_app_runner.yml +++ b/.github/workflows/build_app_runner.yml @@ -43,7 +43,7 @@ jobs: run: | tree - name: Upload Release Assets - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: files: | app_runner_linux_x86_64/app_runner_linux_x86_64.zip From 0b65e1515ba740ee1839441c234854d5fcacf8f7 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 22 Nov 2024 12:00:09 +0100 Subject: [PATCH 134/144] Deactivate entity lookup cache for app run command --- app_runner/docs/changelog.md | 7 +++++++ app_runner/src/app_runner/cli/app.py | 20 +++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/app_runner/docs/changelog.md b/app_runner/docs/changelog.md index 0a45c1e0..b1b68a6e 100644 --- a/app_runner/docs/changelog.md +++ b/app_runner/docs/changelog.md @@ -4,6 +4,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## \[Unreleased\] +## \[0.0.7\] - 2024-11-22 + +### Fixed + +- When executing `app run` the experimental entity cache created incorrect behavior. The caching is temporarily disabled, + until the issue is resolved. + ## \[0.0.6\] - 2024-11-14 First version with CD that will trigger the deployment automatically. diff --git a/app_runner/src/app_runner/cli/app.py b/app_runner/src/app_runner/cli/app.py index 5aa593b6..3e060a47 100644 --- a/app_runner/src/app_runner/cli/app.py +++ b/app_runner/src/app_runner/cli/app.py @@ -28,15 +28,17 @@ def run( setup_script_logging() client = Bfabric.from_config() app_spec_parsed = AppSpec.model_validate(yaml.safe_load(app_spec.read_text())) - with EntityLookupCache.enable(): - run_app( - app_spec=app_spec_parsed, - workunit_ref=workunit_ref, - work_dir=work_dir, - client=client, - ssh_user=ssh_user, - read_only=read_only, - ) + # TODO(#107): usage of entity lookup cache was problematic -> beyond the full solution we could also consider + # to deactivate the cache for the output registration + # with EntityLookupCache.enable(): + run_app( + app_spec=app_spec_parsed, + workunit_ref=workunit_ref, + work_dir=work_dir, + client=client, + ssh_user=ssh_user, + read_only=read_only, + ) @app_app.command() From 903cdfc8d7429eaa9953321c297a372a4728955f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 2 Dec 2024 11:21:14 +0100 Subject: [PATCH 135/144] add application name --- .../bfabric_slurm_queue_status.py | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/bfabric_scripts/bfabric_slurm_queue_status.py b/src/bfabric_scripts/bfabric_slurm_queue_status.py index a9f57159..6df0ae06 100644 --- a/src/bfabric_scripts/bfabric_slurm_queue_status.py +++ b/src/bfabric_scripts/bfabric_slurm_queue_status.py @@ -5,13 +5,13 @@ import json import shlex import subprocess -import sys import polars as pl +import sys from loguru import logger from bfabric import Bfabric -from bfabric.entities import Workunit +from bfabric.entities import Workunit, Application def get_slurm_jobs(partition: str, ssh_host: str | None) -> pl.DataFrame: @@ -33,12 +33,26 @@ def get_slurm_jobs(partition: str, ssh_host: str | None) -> pl.DataFrame: return df.with_columns(workunit_id=pl.when(string_id_expr.is_not_null()).then(string_id_expr.cast(int))) -def get_workunit_status(client: Bfabric, workunit_ids: list[int]) -> dict[int, str]: - """Returns the status of the workunits with the specified ids, by consoluting the bfabric API. +def get_workunit_infos(client: Bfabric, workunit_ids: list[int]) -> list[dict[str, str]]: + """Retrieves information about the workunits with the specified ids. If a workunit was deleted, but it is in the slurm queue, it will be considered a zombie. """ + # Find the workunits which actually exist. workunits = Workunit.find_all(ids=workunit_ids, client=client) - return {id: workunits[id].data_dict["status"] if id in workunits else "ZOMBIE" for id in workunit_ids} + + # Retrieve application id -> name mapping. + app_ids = {wu["application"]["id"] for wu in workunits.values()} + apps = Application.find_all(ids=list(app_ids), client=client) + app_names = {app["id"]: app["name"] for app in apps.values()} + + return [ + { + "workunit_id": id, + "status": workunits[id].data_dict["status"] if id in workunits else "ZOMBIE", + "application_name": app_names[workunits[id]["application"]["id"]] if id in workunits else "N/A", + } + for id in workunit_ids + ] def find_zombie_jobs(client: Bfabric, partition: str, ssh_host: str | None) -> pl.DataFrame: @@ -46,14 +60,14 @@ def find_zombie_jobs(client: Bfabric, partition: str, ssh_host: str | None) -> p slurm_jobs = get_slurm_jobs(partition=partition, ssh_host=ssh_host) if slurm_jobs.is_empty(): return pl.DataFrame() - workunit_status = get_workunit_status( - client=client, workunit_ids=slurm_jobs["workunit_id"].drop_nulls().cast(int).to_list() + workunit_info_table = pl.DataFrame( + get_workunit_infos(client=client, workunit_ids=slurm_jobs["workunit_id"].drop_nulls().cast(int).to_list()) ) - workunit_status_table = pl.from_dict(dict(workunit_id=workunit_status.keys(), status=workunit_status.values())) - logger.info(slurm_jobs.join(workunit_status_table, on="workunit_id", how="left").sort("workunit_id")) - logger.info(f"Active jobs: {workunit_status_table.height}") - logger.info(f"Found {workunit_status_table.filter(pl.col('status') == 'ZOMBIE').height} zombie jobs.") - return workunit_status_table.filter(pl.col("status") == "ZOMBIE") + pl.Config.set_tbl_rows(100) + logger.info(slurm_jobs.join(workunit_info_table, on="workunit_id", how="left").sort("workunit_id")) + logger.info(f"Active jobs: {workunit_info_table.height}") + logger.info(f"Found {workunit_info_table.filter(pl.col('status') == 'ZOMBIE').height} zombie jobs.") + return workunit_info_table.filter(pl.col("status") == "ZOMBIE") def main() -> None: From f14fea16adf38da94f7189b0e22c1c5d4cb796d8 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 3 Dec 2024 08:35:50 +0100 Subject: [PATCH 136/144] adapt unit test --- .../test_bfabric_slurm_queue_status.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/bfabric_scripts/test_bfabric_slurm_queue_status.py b/tests/bfabric_scripts/test_bfabric_slurm_queue_status.py index dac45445..faf5b56d 100644 --- a/tests/bfabric_scripts/test_bfabric_slurm_queue_status.py +++ b/tests/bfabric_scripts/test_bfabric_slurm_queue_status.py @@ -2,8 +2,8 @@ import polars.testing import pytest -from bfabric.entities import Workunit -from bfabric_scripts.bfabric_slurm_queue_status import get_slurm_jobs, get_workunit_status +from bfabric.entities import Workunit, Application +from bfabric_scripts.bfabric_slurm_queue_status import get_slurm_jobs, get_workunit_infos @pytest.fixture @@ -32,16 +32,21 @@ def test_get_slurm_jobs_when_local(mocker, command_output): pl.testing.assert_frame_equal(df, expected_df) -def test_get_workunit_status(mocker): +def test_get_workunit_infos(mocker): mock_client = mocker.Mock(name="mock_client") - mock_find_all = mocker.patch.object(Workunit, "find_all") + mock_workunit_find_all = mocker.patch.object(Workunit, "find_all") workunit_ids = [5000, 5001] - mock_find_all.return_value = { - 5001: Workunit({"id": 5000, "status": "RUNNING"}), + mock_workunit_find_all.return_value = { + 5001: Workunit({"id": 5000, "status": "RUNNING", "application": {"id": 1}}), } - status = get_workunit_status(mock_client, workunit_ids) - assert status == {5000: "ZOMBIE", 5001: "RUNNING"} - mock_find_all.assert_called_once_with(ids=workunit_ids, client=mock_client) + mock_app_find_all = mocker.patch.object(Application, "find_all") + mock_app_find_all.return_value = {1: {"id": 1, "name": "myapp"}} + infos = get_workunit_infos(mock_client, workunit_ids) + assert infos == [ + {"workunit_id": 5000, "status": "ZOMBIE", "application_name": "N/A"}, + {"workunit_id": 5001, "status": "RUNNING", "application_name": "myapp"}, + ] + mock_workunit_find_all.assert_called_once_with(ids=workunit_ids, client=mock_client) if __name__ == "__main__": From 85abd62e5d9d929ee79c4533541f0943bdf06d8f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 6 Dec 2024 09:37:13 +0100 Subject: [PATCH 137/144] extract get_resource_flow_input_resource common functionality --- .../dispatch/dispatch_individual_resources.py | 20 +++++++----------- .../src/app_runner/dispatch/resource_flow.py | 21 +++++++++++++++++++ 2 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 app_runner/src/app_runner/dispatch/resource_flow.py diff --git a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index 2344cf6c..495eb616 100644 --- a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -3,10 +3,11 @@ from typing import Any, TYPE_CHECKING import yaml -from bfabric.entities import Resource, Dataset -from loguru import logger from pydantic import BaseModel, ConfigDict, model_validator +from app_runner.dispatch.resource_flow import get_resource_flow_input_resources +from bfabric.entities import Resource, Dataset + if TYPE_CHECKING: from pathlib import Path from bfabric import Bfabric @@ -88,19 +89,14 @@ def _write_chunks(self, chunks: list[Path]) -> None: yaml.safe_dump(data, f) def _dispatch_jobs_resource_flow(self, definition: WorkunitDefinition, params: dict[str, Any]) -> list[Path]: + """Returns the individual jobs for a resource flow workunit and returns the paths of the task folders.""" config = self._config.resource_flow if config is None: raise ValueError("resource_flow is not configured") - resources = Resource.find_all(ids=definition.execution.resources, client=self._client) - paths = [] - for resource in sorted(resources.values()): - if config.filter_suffix is not None and not resource["relativepath"].endswith(config.filter_suffix): - logger.info( - f"Skipping resource {resource['relativepath']!r} as it does not match the extension filter." - ) - continue - paths.append(self.dispatch_job(resource=resource, params=params)) - return paths + resources = get_resource_flow_input_resources( + client=self._client, definition=definition, filter_suffix=config.filter_suffix + ) + return [self.dispatch_job(resource=resource, params=params) for resource in resources] def _dispatch_jobs_dataset_flow(self, definition: WorkunitDefinition, params: dict[str, Any]) -> list[Path]: config = self._config.dataset_flow diff --git a/app_runner/src/app_runner/dispatch/resource_flow.py b/app_runner/src/app_runner/dispatch/resource_flow.py new file mode 100644 index 00000000..a3d9f6a0 --- /dev/null +++ b/app_runner/src/app_runner/dispatch/resource_flow.py @@ -0,0 +1,21 @@ +from loguru import logger + +from bfabric import Bfabric +from bfabric.entities import Resource +from bfabric.experimental.workunit_definition import WorkunitDefinition + + +def get_resource_flow_input_resources( + client: Bfabric, + definition: WorkunitDefinition, + filter_suffix: str | None, +) -> list[Resource]: + """Returns the input resources for a resource flow workunit, applying e.g. a filter suffix.""" + all_resources = Resource.find_all(ids=definition.execution.resources, client=client) + result_resources = [] + for resource in sorted(all_resources.values()): + if filter_suffix is not None and not resource["relativepath"].endswith(filter_suffix): + logger.info(f"Skipping resource {resource['relativepath']!r} as it does not match the extension filter.") + continue + result_resources.append(resource) + return result_resources From 9af06dc7772e3d3ad2ff874f2b64b0f70e04eb7e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 6 Dec 2024 10:03:11 +0100 Subject: [PATCH 138/144] extract some more generic functionality --- .../dispatch/dispatch_individual_resources.py | 17 +++---------- app_runner/src/app_runner/dispatch/generic.py | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 app_runner/src/app_runner/dispatch/generic.py diff --git a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py index 495eb616..4e29f50c 100644 --- a/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py +++ b/app_runner/src/app_runner/dispatch/dispatch_individual_resources.py @@ -2,9 +2,9 @@ from typing import Any, TYPE_CHECKING -import yaml from pydantic import BaseModel, ConfigDict, model_validator +from app_runner.dispatch.generic import write_workunit_definition_file, write_chunks_file from app_runner.dispatch.resource_flow import get_resource_flow_input_resources from bfabric.entities import Resource, Dataset @@ -74,19 +74,8 @@ def dispatch_workunit(self, definition: WorkunitDefinition) -> None: paths = self._dispatch_jobs_dataset_flow(definition, params) else: raise ValueError("either dataset or resources must be provided") - self._write_workunit_definition(definition=definition) - self._write_chunks(chunks=paths) - - def _write_workunit_definition(self, definition: WorkunitDefinition) -> None: - self._out_dir.mkdir(exist_ok=True, parents=True) - with (self._out_dir / "workunit_definition.yml").open("w") as f: - yaml.safe_dump(definition.model_dump(mode="json"), f) - - def _write_chunks(self, chunks: list[Path]) -> None: - self._out_dir.mkdir(exist_ok=True, parents=True) - with (self._out_dir / "chunks.yml").open("w") as f: - data = {"chunks": [str(chunk) for chunk in chunks]} - yaml.safe_dump(data, f) + write_workunit_definition_file(out_dir=self._out_dir, definition=definition) + write_chunks_file(out_dir=self._out_dir, chunks=paths) def _dispatch_jobs_resource_flow(self, definition: WorkunitDefinition, params: dict[str, Any]) -> list[Path]: """Returns the individual jobs for a resource flow workunit and returns the paths of the task folders.""" diff --git a/app_runner/src/app_runner/dispatch/generic.py b/app_runner/src/app_runner/dispatch/generic.py new file mode 100644 index 00000000..b0e50f2b --- /dev/null +++ b/app_runner/src/app_runner/dispatch/generic.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import yaml + +if TYPE_CHECKING: + from pathlib import Path + from bfabric.experimental.workunit_definition import WorkunitDefinition + + +def write_workunit_definition_file(out_dir: Path, definition: WorkunitDefinition) -> None: + """Writes the workunit definition to the output directory's 'workunit_definition.yml'.""" + out_dir.mkdir(exist_ok=True, parents=True) + with (out_dir / "workunit_definition.yml").open("w") as f: + yaml.safe_dump(definition.model_dump(mode="json"), f) + + +def write_chunks_file(out_dir: Path, chunks: list[Path]) -> None: + """Writes the list of chunk paths to the output directory's 'chunks.yml'.""" + out_dir.mkdir(exist_ok=True, parents=True) + with (out_dir / "chunks.yml").open("w") as f: + data = {"chunks": [str(chunk) for chunk in chunks]} + yaml.safe_dump(data, f) From 8c7d051bc34fcf71f78d004ed125fd204cd30c88 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 9 Dec 2024 11:50:46 +0100 Subject: [PATCH 139/144] name the parameters --- .../wrapper_creator/bfabric_submitter.py | 65 +++++++++---------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py index 7e4bc4e6..7ef35c1c 100644 --- a/src/bfabric/wrapper_creator/bfabric_submitter.py +++ b/src/bfabric/wrapper_creator/bfabric_submitter.py @@ -64,13 +64,6 @@ def __init__( logger.debug(f"memory={self.memory}") logger.debug("__init__ DONE") - # def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): - # GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.scheduleroot) - # print(script) - # print(type(script)) - # resQsub = GE.qsub(script=script, arguments=arguments) - # self.B.logger(f"{resQsub}") - def submit_slurm(self, script: str = "/tmp/runme.bash") -> None: slurm = SLURM(slurm_root=self.scheduleroot) logger.debug(script) @@ -98,33 +91,33 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: # 2020-09-29 # https://GitHub.com/fgcz/bfabricPy/ # Slurm -#SBATCH --partition={0} -#SBATCH --nodelist={11} +#SBATCH --partition={partition} +#SBATCH --nodelist={nodelist} #SBATCH -n 1 #SBATCH -N 1 #SBATCH --cpus-per-task=1 -#SBATCH --mem-per-cpu={12} -#SBATCH -e {1} -#SBATCH -o {2} -#SBATCH --job-name=WU{10} +#SBATCH --mem-per-cpu={memory} +#SBATCH -e {stderr_url} +#SBATCH -o {stdout_url} +#SBATCH --job-name=WU{workunit_id} #SBATCH --workdir=/home/bfabric #SBATCH --export=ALL,HOME=/home/bfabric # Grid Engine Parameters -#$ -q {0}&{11} -#$ -e {1} -#$ -o {2} +#$ -q {partition}&{nodelist} +#$ -e {stderr_url} +#$ -o {stdout_url} set -e set -o pipefail export EMAIL="{job_notification_emails}" -export EXTERNALJOB_ID={3} -export RESSOURCEID_OUTPUT={4} -export RESSOURCEID_STDOUT_STDERR="{5} {6}" -export OUTPUT="{7}" -export WORKUNIT_ID="{10}" +export EXTERNALJOB_ID={external_job_id} +export RESSOURCEID_OUTPUT={resource_id_output} +export RESSOURCEID_STDOUT_STDERR="{resource_id_stderr} {resource_id_stdout}" +export OUTPUT="{output_list}" +export WORKUNIT_ID="{workunit_id}" STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID TEMPDIR="/home/bfabric/prx" @@ -145,7 +138,7 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: # job configuration set by B-Fabrics wrapper_creator executable # application parameter/configuration cat > $TEMPDIR/config_WU$WORKUNIT_ID.yaml < Date: Mon, 9 Dec 2024 12:02:05 +0100 Subject: [PATCH 140/144] simplify and fixes --- .../bfabric_upload_submitter_executable.py | 57 ++++++------------- 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/src/bfabric_scripts/bfabric_upload_submitter_executable.py b/src/bfabric_scripts/bfabric_upload_submitter_executable.py index ba872875..98080fcc 100755 --- a/src/bfabric_scripts/bfabric_upload_submitter_executable.py +++ b/src/bfabric_scripts/bfabric_upload_submitter_executable.py @@ -17,16 +17,6 @@ # Licensed under GPL version 3 # # -# Usage: bfabric_upload_submitter_executable.py [-h] filename {slurm,gridengine} -# -# Arguments for new submitter executable. For more details run: -# ./bfabric_upload_submitter_executable.py --help -# -# positional arguments: -# filename Bash executable of the submitter -# {slurm,gridengine} Valid engines for job handling are: slurm, gridengine -# -# # Example of use: # # For bfabric.__version__ < 0.10.22 @@ -42,23 +32,22 @@ # # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py slurm --name "Dummy_-_yaml___Slurm_executable" --description "test new submitter's parameters" # +from __future__ import annotations import argparse import base64 +from pathlib import Path import yaml from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging -def main_upload_submitter_executable(options) -> None: - executableFileName = options.filename - engine = options.engine - - client = Bfabric.from_config() - - with open(executableFileName) as f: - executable = f.read() +def main_upload_submitter_executable( + client: Bfabric, filename: Path, engine: str, name: str | None, description: str | None +) -> None: + executable = filename.read_text() attr = { "context": "SUBMITTER", @@ -99,25 +88,13 @@ def main_upload_submitter_executable(options) -> None: attr["parameter"][2]["value"] = "10G" attr["version"] = 1.02 attr["description"] = "Stage the yaml config file to application using Slurm." - elif engine == "gridengine": - attr["name"] = "yaml / Grid Engine executable" - attr["parameter"][0]["description"] = "Which Grid Engine partition should be used." - attr["parameter"][0]["enumeration"] = "PRX" - attr["parameter"][0]["key"] = "partition" - attr["parameter"][0]["label"] = "partition" - attr["parameter"][0]["value"] = "PRX" - attr["parameter"][1]["description"] = "Which Grid Engine node should be used." - attr["parameter"][1]["enumeration"] = ["fgcz-r-033", "fgcz-r-028", "fgcz-r-018"] - attr["parameter"][1]["key"] = "nodelist" - attr["parameter"][1]["label"] = "nodelist" - attr["parameter"][1]["value"] = "fgcz-r-028" - attr["version"] = 1.00 - attr["description"] = "Stage the yaml config file to an application using Grid Engine." + else: + raise NotImplementedError - if options.name: - attr["name"] = options.name - if options.description: - attr["description"] = options.description + if name: + attr["name"] = name + if description: + attr["description"] = description res = client.save("executable", attr) print(yaml.dump(res)) @@ -125,18 +102,20 @@ def main_upload_submitter_executable(options) -> None: def main() -> None: """Parses command line arguments and calls `main_upload_submitter_executable`.""" + setup_script_logging() + client = Bfabric.from_config() parser = argparse.ArgumentParser() - parser.add_argument("filename", type=str, help="Bash executable of the submitter") + parser.add_argument("filename", type=Path, help="Bash executable of the submitter") parser.add_argument( "engine", type=str, - choices=["slurm", "gridengine"], + choices=["slurm"], help="Valid engines for job handling are: slurm, gridengine", ) parser.add_argument("--name", type=str, help="Name of the submitter", required=False) parser.add_argument("--description", type=str, help="Description about the submitter", required=False) options = parser.parse_args() - main(options) + main_upload_submitter_executable(client=client, **vars(options)) if __name__ == "__main__": From 82075048ec058e3101780916444688ffb8f953fc Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 9 Dec 2024 12:10:33 +0100 Subject: [PATCH 141/144] refactor --- .../bfabric_upload_submitter_executable.py | 54 +++++++++---------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/bfabric_scripts/bfabric_upload_submitter_executable.py b/src/bfabric_scripts/bfabric_upload_submitter_executable.py index 98080fcc..052a4d77 100755 --- a/src/bfabric_scripts/bfabric_upload_submitter_executable.py +++ b/src/bfabric_scripts/bfabric_upload_submitter_executable.py @@ -44,6 +44,26 @@ from bfabric.cli_formatting import setup_script_logging +def slurm_parameters() -> list[dict[str, str]]: + parameters = [{"modifiable": "true", "required": "true", "type": "STRING"} for _ in range(3)] + parameters[0]["description"] = "Which Slurm partition should be used." + parameters[0]["enumeration"] = ["prx"] + parameters[0]["key"] = "partition" + parameters[0]["label"] = "partition" + parameters[0]["value"] = "prx" + parameters[1]["description"] = "Which Slurm nodelist should be used." + parameters[1]["enumeration"] = ["fgcz-r-033"] + parameters[1]["key"] = "nodelist" + parameters[1]["label"] = "nodelist" + parameters[1]["value"] = "fgcz-r-[035,028]" + parameters[2]["description"] = "Which Slurm memory should be used." + parameters[2]["enumeration"] = ["10G", "50G", "128G", "256G", "512G", "960G"] + parameters[2]["key"] = "memory" + parameters[2]["label"] = "memory" + parameters[2]["value"] = "10G" + return parameters + + def main_upload_submitter_executable( client: Bfabric, filename: Path, engine: str, name: str | None, description: str | None ) -> None: @@ -51,11 +71,7 @@ def main_upload_submitter_executable( attr = { "context": "SUBMITTER", - "parameter": [ - {"modifiable": "true", "required": "true", "type": "STRING"}, - {"modifiable": "true", "required": "true", "type": "STRING"}, - {"modifiable": "true", "required": "true", "type": "STRING"}, - ], + "parameter": [], "masterexecutableid": 11871, "status": "available", "enabled": "true", @@ -64,30 +80,10 @@ def main_upload_submitter_executable( } if engine == "slurm": - attr["name"] = "yaml / Slurm executable" - attr["parameter"][0]["description"] = "Which Slurm partition should be used." - attr["parameter"][0]["enumeration"] = ["prx", "maxquant", "scaffold", "mascot"] - attr["parameter"][0]["key"] = "partition" - attr["parameter"][0]["label"] = "partition" - attr["parameter"][0]["value"] = "prx" - attr["parameter"][1]["description"] = "Which Slurm nodelist should be used." - attr["parameter"][1]["enumeration"] = [ - "fgcz-r-[035,028]", - "fgcz-r-035", - "fgcz-r-033", - "fgcz-r-028", - "fgcz-r-018", - ] - attr["parameter"][1]["key"] = "nodelist" - attr["parameter"][1]["label"] = "nodelist" - attr["parameter"][1]["value"] = "fgcz-r-[035,028]" - attr["parameter"][2]["description"] = "Which Slurm memory should be used." - attr["parameter"][2]["enumeration"] = ["10G", "50G", "128G", "256G", "512G", "960G"] - attr["parameter"][2]["key"] = "memory" - attr["parameter"][2]["label"] = "memory" - attr["parameter"][2]["value"] = "10G" - attr["version"] = 1.02 - attr["description"] = "Stage the yaml config file to application using Slurm." + name = name or "yaml / Slurm executable" + description = description or "Submitter executable for the bfabric functional test using Slurm." + attr["version"] = "1.03" + attr["parameter"] = slurm_parameters() else: raise NotImplementedError From 37256eac944fbcb9cf357f35a686fe7f1b678d1d Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 9 Dec 2024 12:12:52 +0100 Subject: [PATCH 142/144] refactor --- .../bfabric_executable_submitter_slurm.py | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/src/bfabric_scripts/bfabric_executable_submitter_slurm.py b/src/bfabric_scripts/bfabric_executable_submitter_slurm.py index 1f4bf375..ae3208e5 100755 --- a/src/bfabric_scripts/bfabric_executable_submitter_slurm.py +++ b/src/bfabric_scripts/bfabric_executable_submitter_slurm.py @@ -4,6 +4,11 @@ Submitter for B-Fabric """ +from argparse import ArgumentParser + +from bfabric import Bfabric +from bfabric.wrapper_creator.bfabric_submitter import BfabricSubmitter + # Copyright (C) 2014,2015 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # Modified to submit to the Slurm scheduler on 2020-09-28 # @@ -31,35 +36,13 @@ """ -# import os -# import sys -from optparse import OptionParser -from bfabric import BfabricSubmitter - - def main() -> None: - parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") - - parser.add_option( - "-j", - "--externaljobid", - type="int", - action="store", - dest="externaljobid", - default=None, - help="external job id is required.", - ) - - (options, args) = parser.parse_args() - - if not options.externaljobid: - parser.error("option '-j' is required.") - - bfapp = BfabricSubmitter(externaljobid=options.externaljobid, SCHEDULEROOT="/usr/", scheduler="Slurm") - + parser = ArgumentParser(help="Submitter for B-Fabric") + parser.add_argument("-j", "--externaljobid", type=int) + args = parser.parse_args() + client = Bfabric.from_config() + bfapp = BfabricSubmitter(client=client, externaljobid=args.externaljobid, scheduleroot="/usr/", scheduler="Slurm") bfapp.submitter_yaml() - # TODO(cp): fix that - # print(bfapp.query_counter) if __name__ == "__main__": From 5a27cad8ef13b4b3c3411593f8e5b501b76a3a00 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 10 Dec 2024 09:10:40 +0100 Subject: [PATCH 143/144] add script bfabric_save_resource_description.py --- docs/changelog.md | 1 + pyproject.toml | 1 + .../bfabric_save_resource_description.py | 28 +++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 src/bfabric_scripts/bfabric_save_resource_description.py diff --git a/docs/changelog.md b/docs/changelog.md index 2f36dc8c..817d4107 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -17,6 +17,7 @@ instead a separate Python package with its individual changelog. - Relationship: `ExternalJob.executable` - (experimental) EntityLookupCache that allows to cache entity lookups in a script to avoid redundant requests. +- Specific use case script: bfabric_save_resource_description.py (the functionality will be available in a future CLI). ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 52497a7d..1ad24186 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_save_workunit_attribute.py" = "bfabric_scripts.bfabric_save_workunit_attribute:main" "bfabric_save_workflowstep.py" = "bfabric_scripts.bfabric_save_workflowstep:main" "bfabric_slurm_queue_status.py" = "bfabric_scripts.bfabric_slurm_queue_status:main" +"bfabric_save_resource_description.py" = "bfabric_scripts.bfabric_save_resource_description:main" [tool.setuptools.package-data] "*" = ["py.typed"] diff --git a/src/bfabric_scripts/bfabric_save_resource_description.py b/src/bfabric_scripts/bfabric_save_resource_description.py new file mode 100644 index 00000000..23e54903 --- /dev/null +++ b/src/bfabric_scripts/bfabric_save_resource_description.py @@ -0,0 +1,28 @@ +import argparse +from pathlib import Path + +from rich.pretty import pprint + +from bfabric import Bfabric +from bfabric.cli_formatting import setup_script_logging + + +def save_resource_description(client: Bfabric, id: int, description_file: Path) -> None: + description = description_file.read_text() + obj = {"id": id, "description": description} + response = client.save(endpoint="resource", obj=obj) + pprint(response[0], indent_guides=False) + + +def main() -> None: + setup_script_logging() + parser = argparse.ArgumentParser() + parser.add_argument("id", type=int) + parser.add_argument("description_file", type=Path) + client = Bfabric.from_config() + args = parser.parse_args() + save_resource_description(client=client, **vars(args)) + + +if __name__ == "__main__": + main() From 8dde6780e787009ec124d31ebd7be6e4c29e4280 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 10 Dec 2024 09:11:10 +0100 Subject: [PATCH 144/144] 1.13.9 --- docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 817d4107..e08d67ad 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,6 +10,8 @@ Versioning currently follows `X.Y.Z` where ## \[Unreleased\] +## \[1.13.9\] - 2024-12-10 + From this release onwards, the experimental app runner is not part of the main bfabric package and instead a separate Python package with its individual changelog.