From 3027693b0d4d5d1dc48b7dd8b06493db2abf722f Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 19 Aug 2024 09:49:32 -0500
Subject: [PATCH] Update comments/READMEs based on PR feedback

---
 .gitignore                                     |  3 +++
 README.md                                      |  3 +++
 docker-wrappers/SPRAS/README.md                | 18 ++++++++++++------
 .../SPRAS/spras_profile/config.yaml            | 12 ++++++++++++
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 51f1997f..6df8c5e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,3 +141,6 @@ TempMat.mat
 
 # OSX-specific stuff
 **/.DS_Store
+
+# SPRAS singularity container
+spras.sif
\ No newline at end of file
diff --git a/README.md b/README.md
index 8dc81a2b..2e6c6b54 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,9 @@ Output files will be written to the `output` directory.
 You do not need to manually download Docker images from DockerHub before running SPRAS.
 The workflow will automatically download any missing images as long as Docker is running.
 
+### Running SPRAS with HTCondor
+Large SPRAS workflows may benefit from execution with HTCondor, a scheduler/manager for distributed high-throughput computing workflows that allows many Snakemake steps to be run in parallel. For instructions on running SPRAS in this setting, see `docker-wrappers/SPRAS/README.md`.
+
 ## Components
 **Configuration file**: Specifies which pathway reconstruction algorithms to run, which hyperparameter combinations to use, and which datasets to run them on.
 
diff --git a/docker-wrappers/SPRAS/README.md b/docker-wrappers/SPRAS/README.md
index 1d590daa..6e453bd0 100644
--- a/docker-wrappers/SPRAS/README.md
+++ b/docker-wrappers/SPRAS/README.md
@@ -53,7 +53,7 @@ git clone https://github.com/Reed-CompBio/spras.git
 ```
 
 There are currently two options for running SPRAS with HTCondor. The first is to submit all SPRAS jobs to a single remote Execution Point (EP). The second
-is to use the the snakemake HTCondor executor to parallelize the workflow by submitting each job to its own EP.
+is to use the Snakemake HTCondor executor to parallelize the workflow by submitting each job to its own EP.
 
 ### Submitting All Jobs to a Single EP
 
@@ -68,10 +68,13 @@ CHTC pool, omit the `+WantGlideIn` and `requirements` lines
 
 ### Submitting Parallel Jobs
 
-Parallelizing SPRAS workflows with HTCondor currently requires an experimental executor for HTCondor that has been forked from the upstream [HTCondor Snakemake executor](https://github.com/jhiemstrawisc/snakemake-executor-plugin-htcondor/tree/spras-feature-dev).
-To get this executor, clone the forked repository using the following:
+Parallelizing SPRAS workflows with HTCondor requires two additional pieces of setup. First, it requires an activated SPRAS conda environment with a `pip install`-ed version of the SPRAS module (see the main `README.md` for detailed instructions on pip installation of SPRAS).
+
+Second, it requires an experimental executor for HTCondor that has been forked from the upstream [HTCondor Snakemake executor](https://github.com/htcondor/snakemake-executor-plugin-htcondor).
+
+To get install this executor in the spras conda environment, clone the forked repository using the following:
 ```bash
-git clone -b spras-feature-dev https://github.com/jhiemstrawisc/snakemake-executor-plugin-htcondor.git
+git clone https://github.com/htcondor/snakemake-executor-plugin-htcondor.git
 ```
 
 Then, from your activated `spras` conda environment (important), run:
@@ -105,10 +108,13 @@ To run this same workflow in the OSPool, add the following to the profile's defa
 ```
 
 **Note**: This workflow requires that the terminal session responsible for running snakemake stays active. Closing the terminal will suspend jobs,
-but the workflow can use Snakemakes checkpointing to pick up any jobs where they left off.
+but the workflow can use Snakemake's checkpointing to pick up any jobs where they left off.
+
+**Note**: If you encounter an error that says `No module named 'spras'`, make sure you've `pip install`-ed the SPRAS module into your conda environment.
 
 ### Job Monitoring
-To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates.
+To monitor the state of the job, you can use a second terminal to run `condor_q` for a snapshot of how the workflow is doing, or you can run `condor_watch_q` for realtime updates.
+
 Upon completion, the `output` directory from the workflow should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the
 workflow's logging information (anything that matches `logs/spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should
 contain useful debugging clues about what may have gone wrong.
diff --git a/docker-wrappers/SPRAS/spras_profile/config.yaml b/docker-wrappers/SPRAS/spras_profile/config.yaml
index 3d72043f..0cfb2bba 100644
--- a/docker-wrappers/SPRAS/spras_profile/config.yaml
+++ b/docker-wrappers/SPRAS/spras_profile/config.yaml
@@ -1,11 +1,23 @@
+# Default configuration for the SPRAS/HTCondor executor profile. Each of these values
+# can also be passed via command line flags, e.g. `--jobs 30 --executor htcondor`.
+
+# 'jobs' specifies the maximum number of HTCondor jobs that can be in the queue at once.
 jobs: 30
 executor: htcondor
 configfile: example_config.yaml
+# Indicate to the plugin that jobs running on various EPs do not share a filesystem with
+# each other, or with the AP.
 shared-fs-usage: none
+
+# Default resources will apply to all workflow steps. If a single workflow step fails due
+# to insufficient resources, it can be re-run with modified values. Snakemake will handle
+# picking up where it left off, and won't re-run steps that have already completed.
 default-resources:
   job_wrapper: "spras.sh"
   # If running in CHTC, this only works with apptainer images
   container_image: "spras.sif"
   universe: "container"
+  # The value for request_disk should be large enough to accommodate the runtime container
+  # image, any additional PRM container images, and your input data.
   request_disk: "16GB"
   request_memory: "8GB"