HopkinsIDD · TimothyWillard · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/.github/workflows/conda-env.yml b/.github/workflows/conda-env.yml
@@ -0,0 +1,37 @@
+name: Generate Conda Environment
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - build/create_environment_yml.R
+      - flepimop/R_packages/*/DESCRIPTION
+    branches:
+      - main
+  pull_request:
+    paths:
+      - build/create_environment_yml.R
+      - flepimop/R_packages/*/DESCRIPTION
+    branches:
+      - main
+
+jobs:
+  generate-environment-yml:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.ref }}
+      - uses: r-lib/actions/setup-r@v2
+      - name: Generate Environment YAML
+        run: Rscript build/create_environment_yml.R
+      - name: Check For Environment Change
+        run: |
+          if [[ -n "$(git status -s -- environment.yml)" ]]; then
+            git config --global user.name "${{ github.actor }}"
+            git config --global user.email "${{ github.actor }}@users.noreply.github.com"
+            git add environment.yml
+            git commit -m 'Update `environment.yml` via GitHub action'
+            git push origin ${{ github.event.pull_request.head.ref }}
+          fi
diff --git a/README.md b/README.md
@@ -1,3 +1,19 @@
+
+# FlepiMoP
+
+The *Fle*xible *Epi*demic *Mo*deling *P*ipeline, `FlepiMoP`, makes it easy to build an infectious disease model, infer that model's parameters, and project scenario outcomes.
+
+# Quickstart
+
+```bash
+mkdir myflepimopworkspace && cd $_
+git clone [email protected]:HopkinsIDD/flepiMoP.git --depth 1
+./flepiMop/install_ubuntu.sh
+cp -r ./flepiMoP/examples/tutorial_two_subpops test_model && cd $_
+gempyor-simulate -c config_sample_2pop.yml
+flepimop-inference-main -c config_sample_2pop_inference.yml
+```
+
 # flepiMoP
 
 Welcome to the Johns Hopkins University Infectious Disease Dynamics's `Flexible Epidemic Modeling Pipeline`. “FlepiMoP” provides a framework for quickly coding and simulating compartmental infectious disease models to project epidemic trajectories and their healthcare impacts, and to evaluate the impact of potential interventions. The package is a work-in-progress but is extensively documented https://iddynamics.gitbook.io/flepimop/, with instructions describing how to install the package, code up your model, run forward simulations, and infer model parameters from timeseries data. More details of the project are available on our dedicated website https://www.flepimop.org/. 

diff --git a/batch/AWS_inference_runner.sh b/batch/AWS_inference_runner.sh
@@ -145,14 +145,14 @@ echo "---"
 find data
 echo "==="
 
-echo "***************** RUNNING inference_slot.R *****************"
+echo "***************** RUNNING flepimop-inference-slot *****************"
 
-Rscript flepiMoP/flepimop/main_scripts/inference_slot.R -p flepiMoP
+flepimop-inference-slot -p flepiMoP
 dvc_ret=$?
 if [ $dvc_ret -ne 0 ]; then
-        error_handler "Error code returned from inference_main.R: $dvc_ret"
+        error_handler "Error code returned from flepimop-inference-slot: $dvc_ret"
 fi
-echo "***************** DONE RUNNING inference_slot.R *****************"
+echo "***************** DONE RUNNING flepimop-inference-slot *****************"
 
 echo "***************** UPLOADING RESULT TO S3 *****************"
 for type in "seir" "hosp" "llik" "spar" "snpi" "hnpi" "hpar"

diff --git a/batch/SLURM_inference_job.run b/batch/SLURM_inference_job.run
@@ -136,9 +136,9 @@ fi
 ls -ltr model_output
 echo "***************** DONE FETCHING RESUME FILES *****************"
 
-echo "***************** RUNNING INFERENCE_MAIN.R *****************"
+echo "***************** RUNNING flepimop-inference-slot *****************"
 export LOG_FILE="$FS_RESULTS_PATH/log_${FLEPI_RUN_INDEX}_${FLEPI_SLOT_INDEX}.txt"
-echo "Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R --config $CONFIG_PATH   # path to the config file
+echo "flepimop-inference-slot --config $CONFIG_PATH   # path to the config file
                                                                  --run_id $FLEPI_RUN_INDEX  # Unique identifier for this run
                                                                  --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS  # name of the intervention to run, or 'all'
                                                                  --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS  # name of the outcome scenarios to run, or 'all'
@@ -155,12 +155,12 @@ echo "Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R --config $CONFI
                                                                  --is-resume $RESUME_RUN # Is this run a resume
                                                                  --is-interactive FALSE # Is this run an interactive run" #> $LOG_FILE 2>&1 &
 
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R -p $FLEPI_PATH --config $CONFIG_PATH --run_id $FLEPI_RUN_INDEX --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS --jobs 1 --iterations_per_slot $FLEPI_ITERATIONS_PER_SLOT --this_slot $FLEPI_SLOT_INDEX --this_block 1 --stoch_traj_flag $FLEPI_STOCHASTIC_RUN --is-resume $RESUME_RUN --is-interactive FALSE #> $LOG_FILE 2>&1
+flepimop-inference-slot -p $FLEPI_PATH --config $CONFIG_PATH --run_id $FLEPI_RUN_INDEX --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS --jobs 1 --iterations_per_slot $FLEPI_ITERATIONS_PER_SLOT --this_slot $FLEPI_SLOT_INDEX --this_block 1 --stoch_traj_flag $FLEPI_STOCHASTIC_RUN --is-resume $RESUME_RUN --is-interactive FALSE #> $LOG_FILE 2>&1
 dvc_ret=$?
 if [[ $dvc_ret -ne 0 ]]; then
-        echo "Error code returned from inference_slot.R: $dvc_ret"
+        echo "Error code returned from flepimop-inference-slot: $dvc_ret"
 fi
-echo "***************** DONE RUNNING INFERENCE_SLOT.R *****************"
+echo "***************** DONE flepimop-inference-slot *****************"
 
 
 echo "***************** UPLOADING RESULT TO S3 (OR NOT) *****************"

diff --git a/build/create_environment_yml.R b/build/create_environment_yml.R
@@ -0,0 +1,60 @@
+#!/usr/bin/env Rscript
+
+# Helper functions
+split_pkgs <- \(x) unique(unlist(strsplit(gsub("\\s+", "", x), ",")))
+
+# Light argument parsing
+args <- commandArgs(trailingOnly = TRUE)
+flepi_path <- if (length(args)) args[1L] else getwd()
+
+# Get R package dependencies
+rpkgs <- list.files(
+  file.path(flepi_path, "flepimop", "R_packages"),
+  full.names = TRUE
+)
+dependencies <- sapply(rpkgs, function(rpkg) {
+  description <- read.dcf(file.path(rpkg, "DESCRIPTION"))
+  sections <- c("Depends", "Imports")
+  contained_sections <- sections %in% colnames(description)
+  if (sum(contained_sections) >= 1L) {
+    return(split_pkgs(description[, sections[contained_sections]]))
+  }
+  character()
+}, USE.NAMES = FALSE)
+dependencies <- sort(unique(unlist(dependencies)))
+dependencies <- setdiff(
+  dependencies,
+  c("arrow", "covidcast", "methods", basename(rpkgs))
+)
+dependencies <- dependencies[!grepl("^R(\\(.*\\))?$", dependencies)]
+
+# Construct environment.yml file
+environment_yml <- file.path(flepi_path, "environment.yml")
+new_environment_yml <- c(
+  "channels:",
+  "- conda-forge",
+  "- defaults",
+  "- r",
+  "- dnachun",
+  "dependencies:",
+  "- python=3.10",
+  "- pip",
+  "- r-base>=4.3",
+  "- pyarrow=17.0.0",
+  "- r-arrow=17.0.0",
+  "- r-sf",
+  paste0("- r-", dependencies)
+)
+if (file.exists(environment_yml)) {
+  old_environment_yml <- readLines(environment_yml)
+} else {
+  old_environment_yml <- character()
+}
+old_environment_yml <- old_environment_yml[!grepl("^#", old_environment_yml)]
+if (!identical(new_environment_yml, old_environment_yml)) {
+  new_environment_yml <- c(
+    paste0("# ", format(Sys.time(), "%a %b %d %X %Y %Z")),
+    new_environment_yml
+  )
+  writeLines(new_environment_yml, environment_yml)
+}
diff --git a/build/flepi_init.sh b/build/flepi_init.sh
@@ -0,0 +1,129 @@
+# Generic setup
+set -e
+
+# Cluster specific setup
+if [[ $1 == "longleaf" ]]; then
+    # Setup general purpose user variables needed for Longleaf
+    USERO=$( echo $USER | awk '{ print substr($0, 1, 1) }' )
+    USERN=$( echo $USER | awk '{ print substr($0, 2, 1) }' )
+    USERDIR="/users/$USERO/$USERN/$USER/"
+    WORKDIR="/work/users/$USERO/$USERN/$USER/"
+
+    # Load required modules
+    module purge
+    module load gcc/9.1.0
+    module load anaconda/2023.03
+    module load git
+elif [[ $1 == "rockfish" ]]; then
+    # Setup general purspose user variables needed for RockFish
+    USERDIR=$HOME
+    WORKDIR="/scratch4/struelo1/flepimop-code/$USER/"
+    mkdir -vp $WORKDIR
+
+    # Load required modules
+    module purge
+    module load gcc/9.3.0
+    module load anaconda/2020.07
+    module load git/2.42.0
+else
+    echo "The cluster name '$1' is not recognized, must be one of: 'longleaf', 'rockfish'."
+    exit 1
+fi
+
+# Ensure we have a $FLEPI_PATH
+if [ -z "${FLEPI_PATH}" ]; then
+    echo "An explicit \$FLEPI_PATH was not provided, setting to '$USERDIR/flepiMoP'."
+    export FLEPI_PATH="$USERDIR/flepiMoP"
+fi
+if [ ! -d "$FLEPI_PATH" ]; then
+    echo "You must have a clone of flepiMoP at $FLEPI_PATH to use this script."
+    exit 1
+elif [ ! -d "$FLEPI_PATH/.git" ]; then
+    echo "The flepiMoP found at '$FLEPI_PATH' is not a git clone, unsure of how to proceed."
+    exit 1
+fi
+
+# Conda init
+conda activate $USERDIR/flepimop-env
+
+# Check the conda environment is valid
+WHICH_PYTHON=$( which python )
+WHICH_R=$( which R )
+WHICH_PYTHON_OKAY=$( echo "$WHICH_PYTHON" | grep "flepimop-env" | wc -l )
+WHICH_R_OKAY=$( echo "$WHICH_R" | grep "flepimop-env" | wc -l )
+if [[ "$WHICH_PYTHON_OKAY" -ne 1 ]]; then
+    echo "The python found is '$WHICH_PYTHON', which does not contain the expected 'flepimop-env'."
+    exit 1
+fi
+if [[ "$WHICH_R_OKAY" -ne 1 ]]; then
+    echo "The R found is '$WHICH_R', which does not contain the expected 'flepimop-env'."
+    exit 1
+fi
+PYTHON_ARROW_VERSION=$( python -c "import pyarrow; print(pyarrow.__version__)" )
+R_ARROW_VERSION=$( Rscript -e "cat(as.character(packageVersion('arrow')))" )
+COMPATIBLE_ARROW_VERSION=$( echo "$R_ARROW_VERSION" | grep "$PYTHON_ARROW_VERSION" | wc -l )
+if [[ "$COMPATIBLE_ARROW_VERSION" -ne 1 ]]; then
+    echo "The R version of arrow is '$R_ARROW_VERSION' and the python version is '$PYTHON_ARROW_VERSION'. These may not be compatible versions."
+fi
+
+# Make sure the credentials is is where we expect and have the right perms
+if [ ! -f "$USERDIR/slack_credentials.sh" ]; then
+    echo "You should place sensitive credentials in '$USERDIR/slack_credentials.sh'."
+else
+    chmod 600 $USERDIR/slack_credentials.sh
+    source $USERDIR/slack_credentials.sh
+fi
+
+# Set correct env vars
+export FLEPI_STOCHASTIC_RUN=false
+export FLEPI_RESET_CHIMERICS=TRUE
+export TODAY=`date --rfc-3339='date'`
+
+echo -n "Please set a project path (relative to '$WORKDIR'): "
+read PROJECT_PATH
+export PROJECT_PATH="$WORKDIR/$PROJECT_PATH"
+
+echo -n "Please set a config path (relative to '$PROJECT_PATH'): "
+read CONFIG_PATH
+export CONFIG_PATH="$PROJECT_PATH/$CONFIG_PATH"
+
+echo -n "Please set a validation date (today is $TODAY): "
+read VALIDATION_DATE
+
+echo -n "Please set a resume location: "
+read RESUME_LOCATION
+
+echo -n "Please set a flepi run index: "
+read FLEPI_RUN_INDEX
+
+# Done
+cat << EOM
+> The HPC install script has successfully finished.
+
+If you are testing if this worked, say installing for the first time, you can use the inference example from the \`flepimop_sample\` repository:
+\`\`\`bash
+cd \$PROJECT_PATH
+flepimop-inference-main -c \$CONFIG_PATH -j 1 -n 1 -k 1
+\`\`\`
+Just make sure to \`rm -r model_output\` after running.
+
+Otherwise make sure this diagnostic info looks correct before continuing:
+* Cluster:         $1
+* User directory:  $USERDIR
+* Work directory:  $WORKDIR
+* Flepi path:      $FLEPI_PATH
+* Project path:    $PROJECT_PATH
+* Python:          $WHICH_PYTHON
+* R:               $WHICH_R
+* Python arrow:    $PYTHON_ARROW_VERSION
+* R arrow:         $R_ARROW_VERSION
+* Stochastic run:  $FLEPI_STOCHASTIC_RUN
+* Reset chimerics: $FLEPI_RESET_CHIMERICS
+* Today:           $TODAY
+* Config path:     $CONFIG_PATH
+* Validation date: $VALIDATION_DATE
+* Resume location: $RESUME_LOCATION
+* Flepi run index: $FLEPI_RUN_INDEX
+EOM
+
+set +e