From 3027693b0d4d5d1dc48b7dd8b06493db2abf722f Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 19 Aug 2024 09:49:32 -0500 Subject: [PATCH] Update comments/READMEs based on PR feedback --- .gitignore | 3 +++ README.md | 3 +++ docker-wrappers/SPRAS/README.md | 18 ++++++++++++------ .../SPRAS/spras_profile/config.yaml | 12 ++++++++++++ 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 51f1997f..6df8c5e9 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,6 @@ TempMat.mat # OSX-specific stuff **/.DS_Store + +# SPRAS singularity container +spras.sif \ No newline at end of file diff --git a/README.md b/README.md index 8dc81a2b..2e6c6b54 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,9 @@ Output files will be written to the `output` directory. You do not need to manually download Docker images from DockerHub before running SPRAS. The workflow will automatically download any missing images as long as Docker is running. +### Running SPRAS with HTCondor +Large SPRAS workflows may benefit from execution with HTCondor, a scheduler/manager for distributed high-throughput computing workflows that allows many Snakemake steps to be run in parallel. For instructions on running SPRAS in this setting, see `docker-wrappers/SPRAS/README.md`. + ## Components **Configuration file**: Specifies which pathway reconstruction algorithms to run, which hyperparameter combinations to use, and which datasets to run them on. diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md index 1d590daa..6e453bd0 100644 --- a/docker-wrappers/SPRAS/README.md +++ b/docker-wrappers/SPRAS/README.md @@ -53,7 +53,7 @@ git clone https://github.com/Reed-CompBio/spras.git ``` There are currently two options for running SPRAS with HTCondor. The first is to submit all SPRAS jobs to a single remote Execution Point (EP). The second -is to use the the snakemake HTCondor executor to parallelize the workflow by submitting each job to its own EP. +is to use the Snakemake HTCondor executor to parallelize the workflow by submitting each job to its own EP. ### Submitting All Jobs to a Single EP @@ -68,10 +68,13 @@ CHTC pool, omit the `+WantGlideIn` and `requirements` lines ### Submitting Parallel Jobs -Parallelizing SPRAS workflows with HTCondor currently requires an experimental executor for HTCondor that has been forked from the upstream [HTCondor Snakemake executor](https://github.com/jhiemstrawisc/snakemake-executor-plugin-htcondor/tree/spras-feature-dev). -To get this executor, clone the forked repository using the following: +Parallelizing SPRAS workflows with HTCondor requires two additional pieces of setup. First, it requires an activated SPRAS conda environment with a `pip install`-ed version of the SPRAS module (see the main `README.md` for detailed instructions on pip installation of SPRAS). + +Second, it requires an experimental executor for HTCondor that has been forked from the upstream [HTCondor Snakemake executor](https://github.com/htcondor/snakemake-executor-plugin-htcondor). + +To get install this executor in the spras conda environment, clone the forked repository using the following: ```bash -git clone -b spras-feature-dev https://github.com/jhiemstrawisc/snakemake-executor-plugin-htcondor.git +git clone https://github.com/htcondor/snakemake-executor-plugin-htcondor.git ``` Then, from your activated `spras` conda environment (important), run: @@ -105,10 +108,13 @@ To run this same workflow in the OSPool, add the following to the profile's defa ``` **Note**: This workflow requires that the terminal session responsible for running snakemake stays active. Closing the terminal will suspend jobs, -but the workflow can use Snakemakes checkpointing to pick up any jobs where they left off. +but the workflow can use Snakemake's checkpointing to pick up any jobs where they left off. + +**Note**: If you encounter an error that says `No module named 'spras'`, make sure you've `pip install`-ed the SPRAS module into your conda environment. ### Job Monitoring -To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates. +To monitor the state of the job, you can use a second terminal to run `condor_q` for a snapshot of how the workflow is doing, or you can run `condor_watch_q` for realtime updates. + Upon completion, the `output` directory from the workflow should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the workflow's logging information (anything that matches `logs/spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should contain useful debugging clues about what may have gone wrong. diff --git a/docker-wrappers/SPRAS/spras_profile/config.yaml b/docker-wrappers/SPRAS/spras_profile/config.yaml index 3d72043f..0cfb2bba 100644 --- a/docker-wrappers/SPRAS/spras_profile/config.yaml +++ b/docker-wrappers/SPRAS/spras_profile/config.yaml @@ -1,11 +1,23 @@ +# Default configuration for the SPRAS/HTCondor executor profile. Each of these values +# can also be passed via command line flags, e.g. `--jobs 30 --executor htcondor`. + +# 'jobs' specifies the maximum number of HTCondor jobs that can be in the queue at once. jobs: 30 executor: htcondor configfile: example_config.yaml +# Indicate to the plugin that jobs running on various EPs do not share a filesystem with +# each other, or with the AP. shared-fs-usage: none + +# Default resources will apply to all workflow steps. If a single workflow step fails due +# to insufficient resources, it can be re-run with modified values. Snakemake will handle +# picking up where it left off, and won't re-run steps that have already completed. default-resources: job_wrapper: "spras.sh" # If running in CHTC, this only works with apptainer images container_image: "spras.sif" universe: "container" + # The value for request_disk should be large enough to accommodate the runtime container + # image, any additional PRM container images, and your input data. request_disk: "16GB" request_memory: "8GB"