Merge pull request #113 from fgcz/main

v1.13.10
fgcz · Dec 13, 2024 · 42fb039 · 42fb039
2 parents c466505 + bbc8ad9
commit 42fb039
Show file tree

Hide file tree

Showing 21 changed files with 471 additions and 152 deletions.
diff --git a/.github/workflows/complete_release.yml b/.github/workflows/complete_release.yml
@@ -37,12 +37,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: ./.github/actions/setup-bfabricpy
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.12
+      - name: Install nox
+        run: pip install nox uv
       - name: Publish documentation
         run: |
           set -euxo pipefail
           git fetch --unshallow origin gh-pages
           git checkout gh-pages && git pull && git checkout -
-          uv run mkdocs gh-deploy
+          nox -s docs
+          nox -s publish_docs
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -10,6 +10,21 @@ Versioning currently follows `X.Y.Z` where
 
 ## \[Unreleased\]
 
+## \[1.13.10\] - 2024-12-13
+
+### Fixed
+
+- If `bfabricpy.yml` contains a root-level key which is not a dictionary, a correct error message is shown instead of raising an exception.
+- A bug introduced while refactoring in `slurm.py` which passed `Path` objects to `subprocess` instead of strings.
+- Submitter
+    - does not use unset `JOB_ID` environment variable anymore.
+    - does not set unused `STAMP` environment variable anymore.
+- Fix `bfabric_save_workflowstep.py` bugs from refactoring.
+
+### Added
+
+- Experimental bfabric-cli interface. Please do not use it in production yet as it will need a lot of refinement.
+
 ## \[1.13.9\] - 2024-12-10
 
 From this release onwards, the experimental app runner is not part of the main bfabric package and

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "bfabric"
 description = "Python client for the B-Fabric WSDL API"
-version = "1.13.9"
+version = "1.13.10"
 license = { text = "GPL-3.0" }
 authors = [
     { name = "Christian Panse", email = "[email protected]" },
@@ -78,6 +78,8 @@ Repository = "https://github.com/fgcz/bfabricPy"
 "bfabric_slurm_queue_status.py" = "bfabric_scripts.bfabric_slurm_queue_status:main"
 "bfabric_save_resource_description.py" = "bfabric_scripts.bfabric_save_resource_description:main"
 
+"bfabric-cli" = "bfabric_scripts.cli.__main__:app"
+
 [tool.setuptools.package-data]
 "*" = ["py.typed"]
 

diff --git a/src/bfabric/config/config_file.py b/src/bfabric/config/config_file.py
@@ -23,13 +23,15 @@ class EnvironmentConfig(BaseModel):
     @classmethod
     def gather_config(cls, values: dict[str, Any]) -> dict[str, Any]:
         """Gathers all configs into the config field."""
+        if not isinstance(values, dict):
+            return values
         values["config"] = {key: value for key, value in values.items() if key not in ["login", "password"]}
         return values
 
     @model_validator(mode="before")
     @classmethod
     def gather_auth(cls, values: dict[str, Any]) -> dict[str, Any]:
-        if "login" in values:
+        if isinstance(values, dict) and "login" in values:
             values["auth"] = BfabricAuth.model_validate(values)
         return values
 

diff --git a/src/bfabric/wrapper_creator/bfabric_submitter.py b/src/bfabric/wrapper_creator/bfabric_submitter.py
@@ -100,25 +100,17 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x:
 #SBATCH -e {stderr_url}
 #SBATCH -o {stdout_url}
 #SBATCH --job-name=WU{workunit_id}
-#SBATCH --workdir=/home/bfabric
+#SBATCH --chdir=/home/bfabric
 #SBATCH --export=ALL,HOME=/home/bfabric
 
-# Grid Engine Parameters
-#$ -q {partition}&{nodelist}
-#$ -e {stderr_url}
-#$ -o {stdout_url}
-
-
-set -e
-set -o pipefail
+set -euxo pipefail
 
 export EMAIL="{job_notification_emails}"
 export EXTERNALJOB_ID={external_job_id}
 export RESSOURCEID_OUTPUT={resource_id_output}
 export RESSOURCEID_STDOUT_STDERR="{resource_id_stderr} {resource_id_stdout}"
 export OUTPUT="{output_list}"
 export WORKUNIT_ID="{workunit_id}"
-STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID
 TEMPDIR="/home/bfabric/prx"
 
 _OUTPUT=`echo $OUTPUT | cut -d"," -f1`
@@ -146,11 +138,11 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x:
 ## interrupt here if you want to do a semi-automatic processing
 if [ -x /usr/bin/mutt ];
 then
-    cat $0 > $TEMPDIR/$JOB_ID.bash
+    cat $0 > $TEMPDIR/$EXTERNALJOB_ID.bash
 
     (who am i; hostname; uptime; echo $0; pwd; ps;) \
-    | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \
-        -a $TEMPDIR/$JOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml
+    | mutt -s "WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \
+        -a $TEMPDIR/$EXTERNALJOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml
 fi
 # exit 0
 
@@ -161,15 +153,15 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x:
 if [ $? -eq 0 ];
 then
      ssh fgcz-r-035.uzh.ch "bfabric_setResourceStatus_available.py $RESSOURCEID_OUTPUT" \
-     | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL
+     | mutt -s "WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL
 
      bfabric_save_workflowstep.py $WORKUNIT_ID
      bfabric_setExternalJobStatus_done.py $EXTERNALJOB_ID
      bfabric_setWorkunitStatus_available.py $WORKUNIT_ID
     echo $?
 else
     echo "application failed"
-     mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null
+     mutt -s "WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null
      bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR $RESSOURCEID;
     exit 1;
 fi

diff --git a/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/src/bfabric/wrapper_creator/bfabric_wrapper_creator.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from typing import Any, Literal
 
+import yaml
 from loguru import logger
 
 from bfabric import Bfabric
@@ -71,7 +72,7 @@ def create_log_resource(self, variant: Literal["out", "err"], output_resource: R
                 "name": f"slurm_std{variant}",
                 "workunitid": self.workunit_definition.registration.workunit_id,
                 "storageid": self._log_storage.id,
-                "relativepath": f"/workunitid-{self._workunit.id}_resourceid-{output_resource.id}.{variant}",
+                "relativepath": f"workunitid-{self._workunit.id}_resourceid-{output_resource.id}.{variant}",
             },
         )
         return Resource(result[0])
@@ -177,3 +178,19 @@ def write_results(self, config_serialized: str) -> tuple[dict[str, Any], dict[st
         self._client.save("externaljob", {"id": self._external_job_id, "status": "done"})
 
         return yaml_workunit_executable, yaml_workunit_externaljob
+
+    def create(self) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
+        """Creates the YAML file external job and resources, and registers everything in B-Fabric."""
+        output_resource = self.create_output_resource()
+        stdout_resource = self.create_log_resource(variant="out", output_resource=output_resource)
+        stderr_resource = self.create_log_resource(variant="err", output_resource=output_resource)
+
+        config_dict = {
+            "application": self.get_application_section(output_resource=output_resource),
+            "job_configuration": self.get_job_configuration_section(
+                output_resource=output_resource, stdout_resource=stdout_resource, stderr_resource=stderr_resource
+            ),
+        }
+        config_serialized = yaml.safe_dump(config_dict)
+        yaml_workunit_executable, yaml_workunit_externaljob = self.write_results(config_serialized=config_serialized)
+        return config_dict, yaml_workunit_executable, yaml_workunit_externaljob
diff --git a/src/bfabric/wrapper_creator/slurm.py b/src/bfabric/wrapper_creator/slurm.py
@@ -61,7 +61,12 @@ def sbatch(self, script: str | Path) -> tuple[str, str] | None:
 
         env = os.environ | {"SLURMROOT": self._slurm_root}
         result = subprocess.run(
-            [self._sbatch_bin, script], env=env, check=True, shell=False, capture_output=True, encoding="utf-8"
+            [str(self._sbatch_bin), str(script)],
+            env=env,
+            check=True,
+            shell=False,
+            capture_output=True,
+            encoding="utf-8",
         )
         # TODO the code initially had a TODO to write these two to a file, in general I think the logs of the squeue
         #      are currently not written to a file at all.

diff --git a/src/bfabric_scripts/bfabric_list_not_available_proteomics_workunits.py b/src/bfabric_scripts/bfabric_list_not_available_proteomics_workunits.py
@@ -13,77 +13,9 @@
 from __future__ import annotations
 
 from argparse import ArgumentParser
-from datetime import datetime, timedelta
 
-from loguru import logger
-from rich.console import Console
-from rich.table import Column, Table
-
-from bfabric import Bfabric
 from bfabric.cli_formatting import setup_script_logging
-from bfabric.entities import Parameter, Workunit, Application
-
-
-def render_output(workunits_by_status: dict[str, list[Workunit]], client: Bfabric) -> None:
-    """Renders the output as a table."""
-    table = Table(
-        Column("Application", no_wrap=False),
-        Column("WU ID", no_wrap=True),
-        Column("Created", no_wrap=True),
-        Column("Status", no_wrap=True),
-        Column("Created by", no_wrap=True, max_width=12),
-        Column("Name", no_wrap=False),
-        Column("Nodelist", no_wrap=False),
-    )
-
-    workunit_ids = [wu.id for wu_list in workunits_by_status.values() for wu in wu_list]
-    app_ids = {wu["application"]["id"] for wu_list in workunits_by_status.values() for wu in wu_list}
-
-    nodelist_params = Parameter.find_by({"workunitid": workunit_ids, "key": "nodelist"}, client)
-    nodelist_values = {param["workunit"]["id"]: param.value for param in nodelist_params.values()}
-    application_values = Application.find_all(ids=sorted(app_ids), client=client)
-
-    for status, workunits_all in workunits_by_status.items():
-        workunits = [x for x in workunits_all if x["createdby"] not in ["gfeeder", "itfeeder"]]
-        status_color = {
-            "Pending": "yellow",
-            "Processing": "blue",
-            "Failed": "red",
-        }.get(status, "black")
-
-        for wu in workunits:
-            app = application_values[wu["application"]["id"]]
-            table.add_row(
-                f"[link={app.web_url}]A{wu['application']['id']:3} {app['name']}[/link]",
-                f"[link={wu.web_url}&tab=details]WU{wu['id']}[/link]",
-                wu["created"],
-                f"[{status_color}]{status}[/{status_color}]",
-                wu["createdby"],
-                wu["name"],
-                nodelist_values.get(wu.id, "N/A"),
-            )
-
-    console = Console()
-    console.print(table)
-
-
-def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None:
-    """Lists proteomics work units that are not available on bfabric."""
-    client = Bfabric.from_config()
-    console = Console()
-    with console.capture() as capture:
-        console.print(
-            f"listing not available proteomics work units created after {date_cutoff}", style="bright_yellow", end=""
-        )
-    logger.info(capture.get())
-
-    workunits_by_status = {}
-    for status in ["Pending", "Processing", "Failed"]:
-        workunits_by_status[status] = Workunit.find_by(
-            {"status": status, "createdafter": date_cutoff.isoformat()}, client=client
-        ).values()
-
-    render_output(workunits_by_status, client=client)
+from bfabric_scripts.cli.workunit.not_available import list_not_available_proteomics_workunits
 
 
 def main() -> None:
@@ -92,8 +24,7 @@ def main() -> None:
     parser = ArgumentParser(description="Lists proteomics work units that are not available on bfabric.")
     parser.add_argument("--max-age", type=int, help="Max age of work units in days", default=14)
     args = parser.parse_args()
-    date_cutoff = datetime.today() - timedelta(days=args.max_age)
-    list_not_available_proteomics_workunits(date_cutoff)
+    list_not_available_proteomics_workunits(max_age=args.max_age)
 
 
 if __name__ == "__main__":

diff --git a/src/bfabric_scripts/bfabric_save_workflowstep.py b/src/bfabric_scripts/bfabric_save_workflowstep.py
@@ -42,8 +42,8 @@ def save_workflowstep(workunit_id: int | None = None) -> None:
     }
 
     workunit = client.read("workunit", obj={"id": workunit_id}).to_list_dict()[0]
-    application_id = workunit["application"]["_id"]
-    container_id = workunit["container"]["_id"]
+    application_id = workunit["application"]["id"]
+    container_id = workunit["container"]["id"]
 
     if application_id in workflowtemplatestep_ids and application_id in workflowtemplate_ids:
         workflows = client.read("workflow", obj={"containerid": container_id}).to_list_dict()
@@ -70,7 +70,7 @@ def save_workflowstep(workunit_id: int | None = None) -> None:
                 "workflowtemplatestepid": workflowtemplatestep_ids[application_id],
                 "workunitid": workunit_id,
             },
-        ).to_list_dict()
+        )
         print(res[0])
 
 

diff --git a/src/bfabric_scripts/bfabric_upload_submitter_executable.py b/src/bfabric_scripts/bfabric_upload_submitter_executable.py
@@ -35,65 +35,11 @@
 from __future__ import annotations
 
 import argparse
-import base64
 from pathlib import Path
 
-import yaml
-
 from bfabric import Bfabric
 from bfabric.cli_formatting import setup_script_logging
-
-
-def slurm_parameters() -> list[dict[str, str]]:
-    parameters = [{"modifiable": "true", "required": "true", "type": "STRING"} for _ in range(3)]
-    parameters[0]["description"] = "Which Slurm partition should be used."
-    parameters[0]["enumeration"] = ["prx"]
-    parameters[0]["key"] = "partition"
-    parameters[0]["label"] = "partition"
-    parameters[0]["value"] = "prx"
-    parameters[1]["description"] = "Which Slurm nodelist should be used."
-    parameters[1]["enumeration"] = ["fgcz-r-033"]
-    parameters[1]["key"] = "nodelist"
-    parameters[1]["label"] = "nodelist"
-    parameters[1]["value"] = "fgcz-r-[035,028]"
-    parameters[2]["description"] = "Which Slurm memory should be used."
-    parameters[2]["enumeration"] = ["10G", "50G", "128G", "256G", "512G", "960G"]
-    parameters[2]["key"] = "memory"
-    parameters[2]["label"] = "memory"
-    parameters[2]["value"] = "10G"
-    return parameters
-
-
-def main_upload_submitter_executable(
-    client: Bfabric, filename: Path, engine: str, name: str | None, description: str | None
-) -> None:
-    executable = filename.read_text()
-
-    attr = {
-        "context": "SUBMITTER",
-        "parameter": [],
-        "masterexecutableid": 11871,
-        "status": "available",
-        "enabled": "true",
-        "valid": "true",
-        "base64": base64.b64encode(executable.encode()).decode(),
-    }
-
-    if engine == "slurm":
-        name = name or "yaml / Slurm executable"
-        description = description or "Submitter executable for the bfabric functional test using Slurm."
-        attr["version"] = "1.03"
-        attr["parameter"] = slurm_parameters()
-    else:
-        raise NotImplementedError
-
-    if name:
-        attr["name"] = name
-    if description:
-        attr["description"] = description
-
-    res = client.save("executable", attr)
-    print(yaml.dump(res))
+from bfabric_scripts.cli.external_job.upload_submitter_executable import upload_submitter_executable_impl
 
 
 def main() -> None:
@@ -111,7 +57,7 @@ def main() -> None:
     parser.add_argument("--name", type=str, help="Name of the submitter", required=False)
     parser.add_argument("--description", type=str, help="Description about the submitter", required=False)
     options = parser.parse_args()
-    main_upload_submitter_executable(client=client, **vars(options))
+    upload_submitter_executable_impl(client=client, **vars(options))
 
 
 if __name__ == "__main__":

diff --git a/src/bfabric_scripts/cli/__init__.py b/src/bfabric_scripts/cli/__init__.py
diff --git a/src/bfabric_scripts/cli/__main__.py b/src/bfabric_scripts/cli/__main__.py
@@ -0,0 +1,13 @@
+import cyclopts
+
+from bfabric_scripts.cli.cli_external_job import app as _app_external_job
+from bfabric_scripts.cli.cli_read import app as _app_read
+from bfabric_scripts.cli.cli_workunit import app as _app_workunit
+
+app = cyclopts.App()
+app.command(_app_read, name="read")
+app.command(_app_external_job, name="external-job")
+app.command(_app_workunit, name="workunit")
+
+if __name__ == "__main__":
+    app()