Merge branch 'master' into module/jabba/1.0

LCR-BCCRC · Jan 31, 2022 · d6603a5 · d6603a5
2 parents daeb9a4 + 82439d6
commit d6603a5
Show file tree

Hide file tree

Showing 196 changed files with 14,633 additions and 536 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -16,6 +16,8 @@
 
 - [ ] Input and output files are being symlinked into the `CFG["inputs"]` and `CFG["outputs"]` subdirectories, respectively.
 
+- [ ] I grouped the input symlinking rule to the next job that uses the input files. 
+
 - [ ] I updated the final target rule (`*_all`) to include every output rule.
 
 - [ ] I explained important module design decisions in `CHANGELOG.md`.
@@ -48,4 +50,11 @@
 
 ## Checklist for Updated Module
 
-To be completed.
+Important! If you are updating the module version, ensure the previous version of the module is restored from master.
+If you want to restore a deleted file or directory from the remote master, you can use `git checkout origin/master path/to/file`,
+then a `git commit` will ensure that file is tracked on your branch again.
+Example:
+```
+mv modules/strelka/1.1 modules/strelka/1.2
+git checkout origin/master modules/strelka/1.1
+```
diff --git a/demo/capture_Snakefile.smk b/demo/capture_Snakefile.smk
@@ -0,0 +1,79 @@
+#!/usr/bin/env snakemake
+
+'''
+This Snakefile is made to run all the modules compatible with Capture workflow.
+Compatibility of a workflow can be checked by referring to the pairing_config parameter present in a default.yaml file of that module.
+'''
+##### SETUP #####
+
+import oncopipe as op
+
+# filter sample table to use only capture seq_type
+SAMPLES = op.load_samples("data/samples.tsv")
+CAPTURE = op.filter_samples(SAMPLES, seq_type = "capture")
+
+
+##### REFERENCE_FILES WORKFLOW #####
+
+
+subworkflow reference_files:
+    workdir:
+        "reference/"
+    snakefile:
+        "../workflows/reference_files/2.4/reference_files.smk"
+    configfile:
+        "../workflows/reference_files/2.4/config/default.yaml"
+
+
+##### CONFIGURATION FILES #####
+
+
+# Load module-specific configuration
+configfile: "../modules/slms_3/1.0/config/default.yaml"
+configfile: "../modules/picard_qc/1.0/config/default.yaml"
+configfile: "../modules/bam2fastq/1.2/config/default.yaml"
+configfile: "../modules/sequenza/1.4/config/default.yaml"
+configfile: "../modules/bwa_mem/1.1/config/default.yaml"
+configfile: "../modules/utils/2.1/config/default.yaml"
+configfile: "../modules/liftover/1.2/config/default.yaml"
+configfile: "../modules/battenberg/1.2/config/default.yaml"
+configfile: "../modules/pathseq/1.0/config/default.yaml"
+
+# Load project-specific config, which includes the shared 
+# configuration and some module-specific config updates
+configfile: "capture_config.yaml"
+
+
+##### CONFIGURATION UPDATES #####
+
+
+# Use all samples as a default sample list for each module
+config["lcr-modules"]["_shared"]["samples"] = CAPTURE
+
+##### MODULE SNAKEFILES #####
+
+
+# Load module-specific snakefiles
+include: "../modules/slms_3/1.0/slms_3.smk"
+include: "../modules/picard_qc/1.0/picard_qc.smk"
+include: "../modules/bam2fastq/1.2/bam2fastq.smk"
+include: "../modules/sequenza/1.4/sequenza.smk"
+include: "../modules/bwa_mem/1.1/bwa_mem.smk"
+include: "../modules/utils/2.1/utils.smk"
+include: "../modules/liftover/1.2/liftover.smk"
+include: "../modules/battenberg/1.2/battenberg.smk"
+include: "../modules/pathseq/1.0/pathseq.smk"
+
+
+##### TARGETS ######
+
+rule all:
+    input:
+        rules._slms_3_all.input,
+        rules._picard_qc_all.input,
+        rules._bam2fastq_all.input,
+        rules._sequenza_all.input,
+        rules._bwa_mem_all.input,
+        rules._liftover_all.input,
+        rules._battenberg_all.input,
+        rules._pathseq_all.input
diff --git a/demo/capture_config.yaml b/demo/capture_config.yaml
@@ -0,0 +1,73 @@
+lcr-modules:
+    _shared:
+        lcr-modules: "../"
+        lcr-scripts: "../../lcr-scripts/"
+        root_output_dir: "results/"
+        scratch_directory: "scratch/"
+        unmatched_normal_ids:
+            capture--grch37: "TCRBOA7-N-WEX"
+
+    slms_3:
+        inputs:
+            sample_bam: "data/{sample_id}.bam"
+            sample_bai: "data/{sample_id}.bam.bai"
+
+    picard_qc:
+        inputs:
+            sample_bam: "data/{sample_id}.bam"
+            sample_bai: "data/{sample_id}.bam.bai"
+        switches:
+            capture_intervals:
+                _default: "reference/exomes/grch37/interval/target_regions.nochr_intervals.txt"
+                # if 'capture_kit_id' is a column in samples.tsv and contain more than one kit_id, specify each kit using the values in the column. e.g. and add the corresponding bed file if needed
+                # S07604624: "reference/exomes/grch37/interval/S07604624_intervals.txt"
+                # <grch38_kit>: "reference/exomes/grch38/interval/<grch38_kit>_intervals.txt"
+
+    bam2fastq:
+        inputs:
+            sample_bam: "data/{sample_id}.bam"
+        temp_outputs: True # fastq outputs will be temporary
+
+    sequenza:
+        inputs:
+            sample_bam: "data/{sample_id}.bam"
+            sample_bai: "data/{sample_id}.bam.bai"
+        scratch_subdirectories: []
+
+    bwa_mem:
+        inputs:
+            sample_fastq_1: "results/bam2fastq-1.2/01-fastq/{seq_type}/{sample_id}.read1.fastq.gz"
+            sample_fastq_2: "results/bam2fastq-1.2/01-fastq/{seq_type}/{sample_id}.read2.fastq.gz"
+        scratch_subdirectories: []
+
+
+    liftover:
+        tool: "sequenza"
+        dirs:
+            _parent: "results/sequenza-1.4_liftover-1.2"
+        inputs:
+            sample_seg: "results/sequenza-1.4/99-outputs/filtered_seg/{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.igv.seg"
+
+    utils:
+        inputs:
+            bed:
+                grch37: "data/exome_bed/hg19/target_regions.nochr.bed" # make sure this corresponds with config["lcr-modules"]["picard_qc"]["inputs"]["intervals"]
+                # if testing on GSC, use this file: "/projects/dscott_prj/CCSRI_1500/exomes/ref/agilent/hg19/target_regions.nochr.bed"
+        mem_mb:
+            bam_sort: 48000
+        threads:
+            bam_sort: 12
+
+    battenberg:
+        inputs:
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            sample_bam: "data/{sample_id}.bam"
+
+    pathseq:
+        inputs:
+            sample_bam: "data/{sample_id}.bam"
+            sample_bai: "data/{sample_id}.bam.bai"
+
+        options:
+            min_read_length: 49
+            ebv_cutoff: [0.00004, 0.00008]
diff --git a/demo/config.yaml b/demo/config.yaml
diff --git a/demo/data/TCRBOA7-N-WGS.bam b/demo/data/TCRBOA7-N-WGS.bam
diff --git a/demo/data/TCRBOA7-N-WGS.bam.bai b/demo/data/TCRBOA7-N-WGS.bam.bai
diff --git a/demo/data/TCRBOA7-T-WGS.bam b/demo/data/TCRBOA7-T-WGS.bam
diff --git a/demo/data/TCRBOA7-T-WGS.bam.bai b/demo/data/TCRBOA7-T-WGS.bam.bai
diff --git a/demo/data/samples.tsv b/demo/data/samples.tsv
@@ -2,3 +2,5 @@ sample_id	seq_type	patient_id	tissue_status	genome_build	strand	read_length
 TCRBOA7-N-WEX	capture	TCRBOA7	normal	grch37	positive	100
 TCRBOA7-T-WEX	capture	TCRBOA7	tumour	grch37	positive	100
 TCRBOA7-T-RNA	mrna	TCRBOA7	tumour	grch37	positive	100
+TCRBOA7-N-WGS	genome	TCRBOA7	normal	grch37	positive	100
+TCRBOA7-T-WGS	genome	TCRBOA7	tumour	grch37	positive	100
diff --git a/demo/dry-run.sh b/demo/dry-run.sh
@@ -1,6 +1,18 @@
 #!/bin/bash
 
+# Launches a snakefile of your choice in dry run mode (for debugging)
+# Usage: ./dry_run.sh <snakefile.smk> <target_rule> "<snakemake_flags>"
+# Example: ./dry_run.sh example.smk example_all 
+# snakefile.smk The snakefile you want to run
+# target_rule: The name of one of the target rules specified in one of the included Snakefiles
+# snakemake_flags: One or more flags for the snakemake to run, specified inside quotation marks
+
+
 # Default to all targets
-TARGETS=${@:-all}
+snakefile=$1
+TARGETS=${2:-all}
+snakemake_flags=$3
+
+snakemake --dryrun --cores 24 $snakemake_flags -s $snakefile --printshellcmds --reason --use-conda $TARGETS
+
 
-snakemake --dryrun --cores 24 --printshellcmds --reason --use-conda $TARGETS