diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 7096c7da5..8e4525ad7 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -16,6 +16,8 @@
 
 - [ ] Input and output files are being symlinked into the `CFG["inputs"]` and `CFG["outputs"]` subdirectories, respectively.
 
+- [ ] I grouped the input symlinking rule to the next job that uses the input files. 
+
 - [ ] I updated the final target rule (`*_all`) to include every output rule.
 
 - [ ] I explained important module design decisions in `CHANGELOG.md`.
@@ -48,4 +50,11 @@
 
 ## Checklist for Updated Module
 
-To be completed.
+Important! If you are updating the module version, ensure the previous version of the module is restored from master.
+If you want to restore a deleted file or directory from the remote master, you can use `git checkout origin/master path/to/file`,
+then a `git commit` will ensure that file is tracked on your branch again.
+Example:
+```
+mv modules/strelka/1.1 modules/strelka/1.2
+git checkout origin/master modules/strelka/1.1
+```
diff --git a/demo/Snakefile b/demo/Snakefile
deleted file mode 100755
index 96d9827b1..000000000
--- a/demo/Snakefile
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env snakemake
-
-
-##### SETUP #####
-
-import oncopipe as op
-
-SAMPLES = op.load_samples("data/samples.tsv")
-CAPTURE = op.filter_samples(SAMPLES, seq_type = "capture")
-
-
-##### REFERENCE_FILES WORKFLOW #####
-
-
-subworkflow reference_files:
-    workdir:
-        "reference/"
-    snakefile:
-        "../workflows/reference_files/2.4/reference_files.smk"
-    configfile:
-        "../workflows/reference_files/2.4/config/default.yaml"
-
-
-##### CONFIGURATION FILES #####
-
-
-# Load module-specific configuration
-configfile: "../modules/utils/2.1/config/default.yaml"
-configfile: "../modules/picard_qc/1.0/config/default.yaml"
-configfile: "../modules/salmon/1.1/config/default.yaml"
-configfile: "../modules/bam2fastq/1.2/config/default.yaml"
-configfile: "../modules/star/1.4/config/default.yaml"
-configfile: "../modules/manta/2.3/config/default.yaml"
-configfile: "../modules/gridss/1.1/config/default.yaml"
-configfile: "../modules/vcf2maf/1.2/config/default.yaml"
-configfile: "../modules/sequenza/1.4/config/default.yaml"
-configfile: "../modules/strelka/1.1/config/default.yaml"
-configfile: "../modules/bwa_mem/1.1/config/default.yaml"
-configfile: "../modules/controlfreec/1.1/config/default.yaml"
-configfile: "../modules/lofreq/1.0/config/default.yaml"
-configfile: "../modules/starfish/2.0/config/default.yaml"
-configfile: "../modules/sage/1.0/config/default.yaml"
-configfile: "../modules/slms_3/1.0/config/default.yaml"
-configfile: "../modules/ichorcna/1.0/config/default.yaml"
-configfile: "../modules/gatk_rnaseq/1.0/config/default.yaml"
-
-# Load project-specific config, which includes the shared 
-# configuration and some module-specific config updates
-configfile: "config.yaml"
-
-
-##### CONFIGURATION UPDATES #####
-
-
-# Use all samples as a default sample list for each module
-config["lcr-modules"]["_shared"]["samples"] = SAMPLES
-config["lcr-modules"]["starfish"]["samples"] = CAPTURE
-
-##### MODULE SNAKEFILES #####
-
-
-# Load module-specific snakefiles
-
-include: "../modules/slms_3/1.0/slms_3.smk"
-include: "../modules/utils/2.1/utils.smk"
-include: "../modules/picard_qc/1.0/picard_qc.smk"
-include: "../modules/salmon/1.1/salmon.smk"
-include: "../modules/star/1.4/star.smk"
-include: "../modules/manta/2.3/manta.smk"
-include: "../modules/vcf2maf/1.2/vcf2maf.smk"
-include: "../modules/sequenza/1.4/sequenza.smk"
-include: "../modules/strelka/1.1/strelka.smk"
-include: "../modules/bwa_mem/1.1/bwa_mem.smk"
-include: "../modules/gridss/1.1/gridss.smk"
-include: "../modules/bam2fastq/1.2/bam2fastq.smk"
-include: "../modules/controlfreec/1.1/controlfreec.smk"
-include: "../modules/lofreq/1.0/lofreq.smk"
-include: "../modules/starfish/2.0/starfish.smk"
-include: "../modules/sage/1.0/sage.smk"
-include: "../modules/ichorcna/1.0/ichorcna.smk"
-include: "../modules/gatk_rnaseq/1.0/gatk_rnaseq.smk"
-
-##### TARGETS ######
-
-rule all:
-    input:
-        rules._picard_qc_all.input,
-        rules._salmon_all.input,
-        rules._bam2fastq_all.input,
-        rules._star_all.input,
-        rules._manta_all.input,
-        rules._sequenza_all.input,
-        rules._lofreq_all.input,
-        rules._strelka_all.input,
-        rules._bwa_mem_all.input,
-        rules._liftover_all.input,
-        rules._controlfreec_all.input,
-        rules._gridss_all.input,
-        rules._controlfreec_all.input,
-        rules._starfish_all.input,
-        rules._vcf2maf_all.input,
-        rules._sage_all.input, 
-        rules._slms_3_all.input,
-        rules._ichorcna_all.input,
-        rules._gatk_rnaseq_all.input
-        
diff --git a/demo/config.yaml b/demo/config.yaml
deleted file mode 100755
index 74022ead1..000000000
--- a/demo/config.yaml
+++ /dev/null
@@ -1,179 +0,0 @@
-lcr-modules:
-
-    _shared:
-        lcr-modules: "../"
-        lcr-scripts: "../../lcr-scripts/"
-        root_output_dir: "results/"
-        scratch_directory: "scratch/"
-        unmatched_normal_ids:
-            capture--grch37: "TCRBOA7-N-WEX"
-
-    slms_3: 
-        inputs: 
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-
-
-    bam2fastq:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-        temp_outputs: False # fastq outputs will be temporary
-
-    star:
-        inputs:
-            sample_fastq_1: "results/bam2fastq-1.2/99-outputs/{seq_type}/{sample_id}.read1.fastq.gz"
-            sample_fastq_2: "results/bam2fastq-1.2/99-outputs/{seq_type}/{sample_id}.read2.fastq.gz"
-        scratch_subdirectories: []
-
-    manta:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-   
-    mixcr:
-        inputs:
-            sample_fastq_1: "data/{sample_id}.read1.fastq.gz"
-            sample_fastq_2: "data/{sample_id}.read2.fastq.gz"
-
-    vcf2maf:
-        dirs:
-            _parent: "results/sage-1.0_vcf2maf-1.2"
-        inputs:
-            vep_cache: "reference/vep_caches/"
-            sample_vcf_gz: "results/sage-1.0/99-outputs/combined/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{base_name}.vcf.gz"
-            convert_coord: "{SCRIPTSDIR}/crossmap/1.0/convert_maf_coords.sh"
-        vcf_base_name: "sage.combined"
-        options:
-            vcf2maf: "--filter-vcf 0 --vcf-tumor-id {tumour_id} --vcf-normal-id {normal_id}"
-            species: "homo_sapiens"
-        conda_envs:
-            vcf2maf: "{MODSDIR}/envs/vcf2maf-1.6.18.yaml"
-            crossmap: "{SCRIPTSDIR}/crossmap/1.0/convert_maf_coords.yaml"
-        # here you can specify path to txt file with a list of custom ENST IDs that override canonical selection
-        # it will be parsed to --custom-enst flag of vcf2maf
-        # if no non-canonical transcript IDs to be included, leave switches empty
-        # This is just an example of how to include the list of custom IDs
-        switches:
-            custom_enst:
-              hg38: ""
-              grch37: "data/custom_enst.txt"
-              hs37d5: ""
-        resources:
-            vcf2maf:
-                mem_mb: 12000
-                vcf: 1
-            crossmap:
-                mem_mb: 12000
-
-
-    salmon:
-        inputs:
-            sample_fastq_1: "data/{sample_id}.read1.fastq.gz"
-            sample_fastq_2: "data/{sample_id}.read2.fastq.gz"
-        transcriptome:
-            quant_to: "hg38"
-
-    sequenza:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-        scratch_subdirectories: []
-    
-    lofreq:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-            lofreq_filter: "{MODSDIR}/src/bash/lofreq_filter.sh"
-        switches:
-            # Intentionally running LoFreq without a BED file for simplicity
-            # And to avoid having to include a large BED file in the repo
-            regions_bed:
-                _default: ""
-                capture: ""
-
-    gridss: 
-        inputs: 
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-        references: 
-            # See the current gridss module config file for details about where to obtain this file. 
-            viral_fa: "/projects/rmorin/projects/DLBCL_DHITsig_genomes/reference/gridss/refgenomes/human_virus/human_virus.fa"
-            viral_bwa_prefix: "/projects/rmorin/projects/DLBCL_DHITsig_genomes/reference/gridss/refgenomes/human_virus/human_virus.fa"
-            pon_dir: "/projects/rmorin/reference/hmftools-references/gridss/pon"
-            
-    strelka:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-            # if using manta output, use vcf file in the 99-outputs subdirectory and ensure manta version corresponds to the loaded module
-            candidate_small_indels: "results/manta-2.3/99-outputs/vcf/{seq_type}--{genome_build}/candidateSmallIndels/{tumour_id}--{normal_id}--{pair_status}.candidateSmallIndels.vcf"
-
-    utils:
-        inputs:
-            bed: 
-                grch37: "data/exome_bed/hg19/target_regions.nochr.bed" # make sure this corresponds with config["lcr-modules"]["picard_qc"]["inputs"]["intervals"]
-                # if testing on GSC, use this file: "/projects/dscott_prj/CCSRI_1500/exomes/ref/agilent/hg19/target_regions.nochr.bed"
-        mem_mb:
-            bam_sort: 48000
-        threads:
-            bam_sort: 12
-
-    picard_qc:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-        switches:
-            capture_intervals: 
-                _default: "reference/exomes/grch37/interval/target_regions.nochr_intervals.txt"
-                # if 'capture_kit_id' is a column in samples.tsv and contain more than one kit_id, specify each kit using the values in the column. e.g. and add the corresponding bed file if needed
-                # S07604624: "reference/exomes/grch37/interval/S07604624_intervals.txt"
-                # <grch38_kit>: "reference/exomes/grch38/interval/<grch38_kit>_intervals.txt"
-    
-    bwa_mem:
-        inputs:
-            sample_fastq_1: "results/bam2fastq-1.2/99-outputs/{seq_type}/{sample_id}.read1.fastq.gz"
-            sample_fastq_2: "results/bam2fastq-1.2/99-outputs/{seq_type}/{sample_id}.read2.fastq.gz"
-        scratch_subdirectories: []
-        
-    controlfreec:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-            
-    liftover:
-        tool: "battenberg"
-        inputs:
-            sample_seg: "data/{tool}/hg38/{tumour_sample_id}--{normal_sample_id}_subclones.igv.seg"
-    
-    ichorcna:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
-            
-        scratch_subdirectories: [] # mpileup should be in scratch space
-
-    starfish: 
-        dirs: 
-            _parent: "results/starfish-2.0_strelka-1.1_lofreq-1.0"
-        inputs: 
-            names: ["strelka", "lofreq"]
-            paths: 
-                [
-                    "results/strelka-1.1/99-outputs/vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.strelka.combined.vcf.gz", 
-                    "results/lofreq-1.0/99-outputs/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.lofreq.snvs.vcf.gz"
-                ]
-
-    sage:
-        inputs:
-            # Available wildcards: {seq_type} {genome_build} {sample_id}
-            sample_bam: "data/{sample_id}.bam"
-
-        # include here any additional flags to modify default parameters
-        options:
-            sage_run: ""
-
-
-    gatk_rnaseq:
-        inputs:
-            sample_bam: "data/{sample_id}.bam"
-            sample_bai: "data/{sample_id}.bam.bai"
\ No newline at end of file
diff --git a/demo/data/samples.tsv b/demo/data/samples.tsv
index 1efd24dc1..158f6972b 100755
--- a/demo/data/samples.tsv
+++ b/demo/data/samples.tsv
@@ -2,3 +2,5 @@ sample_id	seq_type	patient_id	tissue_status	genome_build	strand	read_length
 TCRBOA7-N-WEX	capture	TCRBOA7	normal	grch37	positive	100
 TCRBOA7-T-WEX	capture	TCRBOA7	tumour	grch37	positive	100
 TCRBOA7-T-RNA	mrna	TCRBOA7	tumour	grch37	positive	100
+TCRBOA7-N-WGS	genome	TCRBOA7	normal	grch37	positive	100
+TCRBOA7-T-WGS	genome	TCRBOA7	tumour	grch37	positive	100
diff --git a/demo/dry-run.sh b/demo/dry-run.sh
index 326a267e7..eba908957 100755
--- a/demo/dry-run.sh
+++ b/demo/dry-run.sh
@@ -1,6 +1,18 @@
 #!/bin/bash
 
+# Launches a snakefile of your choice in dry run mode (for debugging)
+# Usage: ./dry_run.sh <snakefile.smk> <target_rule> "<snakemake_flags>"
+# Example: ./dry_run.sh example.smk example_all 
+# snakefile.smk The snakefile you want to run
+# target_rule: The name of one of the target rules specified in one of the included Snakefiles
+# snakemake_flags: One or more flags for the snakemake to run, specified inside quotation marks
+
+
 # Default to all targets
-TARGETS=${@:-all}
+snakefile=$1
+TARGETS=${2:-all}
+snakemake_flags=$3
+
+snakemake --dryrun --cores 24 $snakemake_flags -s $snakefile --printshellcmds --reason --use-conda $TARGETS
+
 
-snakemake --dryrun --cores 24 --printshellcmds --reason --use-conda $TARGETS
diff --git a/demo/run.sh b/demo/run.sh
index aeda0d1eb..8e711e965 100755
--- a/demo/run.sh
+++ b/demo/run.sh
@@ -1,7 +1,18 @@
 #!/bin/bash
 
+
+# Launches a snakefile of your choice in dry run mode (for debugging)
+# Usage: ./dry_run.sh <snakefile.smk> <target_rule> "<snakemake_flags>"
+# Example: ./dry_run.sh example.smk example_all
+# snakefile.smk The snakefile you want to run
+# target_rule: The name of one of the target rules specified in one of the included Snakefiles
+# snakemake_flags: One or more flags for the snakemake to run, specified inside quotation marks
+
+
 # Default to all targets
-TARGETS=${@:-all}
+snakefile=$1
+TARGETS=${2:-all}
+snakemake_flags=$3
 
 # Determine the number of available cores for parallelization
 NUM_CORES=$(grep -c '^processor' /proc/cpuinfo)
@@ -17,4 +28,6 @@ if (( $CORES_AVAILABLE <= 0 )); then
     echo "Check out top/htop to see what other jobs are currently running."
     exit 1
 fi
-nice -n 10 snakemake --cores "${CORES_AVAILABLE}" --keep-going --latency-wait 120 --use-conda "$TARGETS"
+nice -n 10 snakemake --cores "${CORES_AVAILABLE}" $snakemake_flags -s $snakefile --keep-going --latency-wait 120 --use-conda $TARGETS
+
+
diff --git a/docs/source/for_developers.rst b/docs/source/for_developers.rst
index 71a17ffe8..dfe1a86fb 100644
--- a/docs/source/for_developers.rst
+++ b/docs/source/for_developers.rst
@@ -21,7 +21,7 @@ Getting Started
       # conda create -n lcr-modules "python>=3.6"
       # conda activate lcr-modules
       
-      conda install cookiecutter git
+      conda install -c conda-forge cookiecutter
 
 4. Clone the `lcr-modules repository`_ and the `lcr-scripts repository`_.
 
diff --git a/images/module_levels.png b/images/module_levels.png
index f1cbbe41f..15a362aa6 100644
Binary files a/images/module_levels.png and b/images/module_levels.png differ
diff --git a/modules/bam2fastq/1.2/bam2fastq.smk b/modules/bam2fastq/1.2/bam2fastq.smk
index 43e0a244d..161cd387a 100644
--- a/modules/bam2fastq/1.2/bam2fastq.smk
+++ b/modules/bam2fastq/1.2/bam2fastq.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["bam2fastq"]`
 CFG = op.setup_module(
@@ -57,7 +78,7 @@ rule _bam2fastq_input_bam:
     output:
         bam = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bam"
     run:
-        op.relative_symlink(input, output.bam)
+        op.absolute_symlink(input, output.bam)
 
 
 # Conditional rules depending on whether or not fastq outputs will be temporary
@@ -126,8 +147,8 @@ rule _bam2fastq_output:
         fastq_1 = CFG["dirs"]["outputs"] + "{seq_type}/{sample_id}.read1.fastq.gz",
         fastq_2 = CFG["dirs"]["outputs"] + "{seq_type}/{sample_id}.read2.fastq.gz"
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.relative_symlink(input.fastq_1, output.fastq_1, in_module = True)
+        op.relative_symlink(input.fastq_2, output.fastq_2, in_module = True)
 
 
 rule _bam2fastq_all:
diff --git a/modules/bam2fastq/1.2/bam2fastq_grouped.smk b/modules/bam2fastq/1.2/bam2fastq_grouped.smk
index df5f56572..741ff1a42 100644
--- a/modules/bam2fastq/1.2/bam2fastq_grouped.smk
+++ b/modules/bam2fastq/1.2/bam2fastq_grouped.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["bam2fastq"]`
 CFG = op.setup_module(
diff --git a/modules/battenberg/1.0/battenberg.smk b/modules/battenberg/1.0/battenberg.smk
index b2da0809d..c2a48d825 100644
--- a/modules/battenberg/1.0/battenberg.smk
+++ b/modules/battenberg/1.0/battenberg.smk
@@ -15,6 +15,26 @@
 import oncopipe as op
 import glob
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["battenberg"]`
 CFG = op.setup_module(
@@ -47,8 +67,8 @@ rule _battenberg_input_bam:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
         bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bam + ".bai", output.bai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam + ".bai", output.bai)
 
 # Installs the Battenberg R dependencies and associated software (impute2, alleleCounter)
 # Currently I think this rule has to be run twice for it to work properly because the conda environment is created here. 
@@ -158,9 +178,9 @@ rule _battenberg_output_seg:
         plots = glob.glob(params.batt_dir + "/*.png")
         for png in plots:
             bn = os.path.basename(png)
-            op.relative_symlink(png, params.png_dir + "/" + bn)
-        op.relative_symlink(input.seg, output.seg)
-        op.relative_symlink(input.sub, output.sub)
+            op.relative_symlink(png, params.png_dir + "/" + bn, in_module = True)
+        op.relative_symlink(input.seg, output.seg, in_module = True)
+        op.relative_symlink(input.sub, output.sub, in_module = True)
 
 # Generates the target sentinels for each run, which generate the symlinks
 rule _battenberg_all:
diff --git a/modules/battenberg/1.0/config/default.yaml b/modules/battenberg/1.0/config/default.yaml
index f69b5d001..647d82bb1 100644
--- a/modules/battenberg/1.0/config/default.yaml
+++ b/modules/battenberg/1.0/config/default.yaml
@@ -6,7 +6,7 @@ lcr-modules:
             sample_bam: "__UPDATE__"
             battenberg_script: "{MODSDIR}/src/R/battenberg_wgs_hg38.R"
             calc_sex_status: "{MODSDIR}/src/bash/calc_sex_status.sh"
-            cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.0/cnv2igv.py"
+            cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.4/cnv2igv.py"
             #TODO: this should be tested with v1.2 of cnv2igv.py
 
         scratch_subdirectories: []
diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml
index 384415efc..5ae3a3179 100644
--- a/modules/battenberg/1.1/config/default.yaml
+++ b/modules/battenberg/1.1/config/default.yaml
@@ -5,7 +5,7 @@ lcr-modules:
             # Available wildcards: {seq_type} {genome_build} {sample_id}
             sample_bam: "__UPDATE__"
             battenberg_script: "{MODSDIR}/src/battenberg_wgs_hg38.R"
-            cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.3/cnv2igv.py"
+            cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.4/cnv2igv.py"
             src_dir: "{MODSDIR}/src/"
 
         scratch_subdirectories: []
diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk
index a89b4b132..370c80339 100644
--- a/modules/battenberg/1.2/battenberg.smk
+++ b/modules/battenberg/1.2/battenberg.smk
@@ -44,7 +44,7 @@ CFG = op.setup_module(
 )
 
 #set variable for prepending to PATH based on config
-SCRIPT_PATH = CFG['inputs']['src_dir']
+BATTENBERG_SCRIPT_PATH = CFG['inputs']['src_dir']
 #this is used in place of the shell.prefix() because that was not working consistently. This is not ideal. 
 
 #this preserves the variable when using lambda functions
@@ -54,7 +54,7 @@ _battenberg_CFG = CFG
 localrules:
     _battenberg_all
 
-VERSION_MAP = {
+BATTENBERG_VERSION_MAP = {
     "hg19": "grch37",
     "grch37": "grch37",
     "hs37d5": "grch37",
@@ -77,10 +77,10 @@ rule _battenberg_get_reference:
         genomesloci = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3")
     params:
         url = "https://www.bcgsc.ca/downloads/morinlab/reference",
-        alt_build = lambda w: VERSION_MAP[w.genome_build],
+        alt_build = lambda w: BATTENBERG_VERSION_MAP[w.genome_build],
         folder = CFG["dirs"]["inputs"] + "reference/{genome_build}",
         build = "{genome_build}",
-        PATH = CFG['inputs']['src_dir']
+        battenberg_path = CFG['inputs']['src_dir']
     resources:
         **CFG["resources"]["reference"]
     threads:
@@ -98,7 +98,7 @@ rule _battenberg_get_reference:
         &&
         wget -O {output.impute_info} {params.url}/impute_info_{params.alt_build}.txt
         &&
-        python {params.PATH}/reference_correction.py {params.build}
+        python {params.battenberg_path}/reference_correction.py {params.build} $(dirname $(readlink -f {output.impute_info}))
         &&
         wget -qO-  {params.url}/battenberg_{params.alt_build}_replic_correction.tar.gz |
         tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder}
@@ -155,7 +155,7 @@ rule _infer_patient_sex:
     threads: 8
     shell:
         op.as_one_line(""" 
-        PATH={SCRIPT_PATH}:$PATH; 
+        PATH={BATTENBERG_SCRIPT_PATH}:$PATH;
         echo "running {rule} for {wildcards.normal_id} on $(hostname) at $(date)" > {log.stderr} ;
         calc_sex_status.sh {input.normal_bam} {input.fasta} {wildcards.normal_id} > {output.sex_result} 2>> {log.stderr} &&
         echo "DONE running {rule} for {wildcards.normal_id} on $(hostname) at $(date)" >> {log.stderr} 
@@ -171,7 +171,7 @@ rule _run_battenberg:
         installed = CFG["dirs"]["inputs"] + "battenberg_dependencies_installed.success",
         sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
-        impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt"
+        impute_info = str(rules._battenberg_get_reference.output.impute_info)
 
     output:
         refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt",
@@ -205,7 +205,7 @@ rule _run_battenberg:
         sex=$(cut -f 4 {input.sex_result}| tail -n 1); 
         echo "setting sex as $sex";
         Rscript {params.script} -t {wildcards.tumour_id} 
-        -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --reference {params.ref}
+        -n {wildcards.normal_id} --tb $(readlink -f {input.tumour_bam}) --nb $(readlink -f {input.normal_bam}) -f {input.fasta} --reference $(readlink -f {params.ref})
         -o {params.out_dir} --chr_prefixed_genome $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} &&  
         echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; 
         """)
diff --git a/modules/battenberg/1.2/src/battenberg_wgs_hg38.R b/modules/battenberg/1.2/src/battenberg_wgs_hg38.R
index fb9f3686e..5f3a27074 100755
--- a/modules/battenberg/1.2/src/battenberg_wgs_hg38.R
+++ b/modules/battenberg/1.2/src/battenberg_wgs_hg38.R
@@ -26,7 +26,7 @@ opt_parser = OptionParser(option_list=option_list)
 opt = parse_args(opt_parser)
 original_dir = getwd()
 
-REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$reference)
+REFERENCE_BASE = opt$reference
 TUMOURNAME = opt$tumourname
 NORMALNAME = opt$normalname
 
@@ -83,8 +83,9 @@ print(PROBLEMLOCI);
 
 # Change to work directory and load the chromosome information
 setwd(RUN_DIR)
-NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb)
-TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb)
+NORMALBAM = opt$nb
+TUMOURBAM = opt$tb
+
 
 #this should be the full path to the files after changing directories
 
diff --git a/modules/battenberg/1.2/src/reference_correction.py b/modules/battenberg/1.2/src/reference_correction.py
index 4b2b881cb..be482fe75 100644
--- a/modules/battenberg/1.2/src/reference_correction.py
+++ b/modules/battenberg/1.2/src/reference_correction.py
@@ -27,12 +27,10 @@
 import os
 import sys
 
-cwd = os.getcwd()
+cwd = sys.argv[2]
 
 fileIN = open(
     cwd
-    + "/results/battenberg-1.2/00-inputs/reference/"
-    + sys.argv[1]
     + "/impute_info.txt",
     "r",
 )
@@ -42,15 +40,11 @@
 newdata = filedata.replace(
     "<path_to_impute_reference_files>",
     cwd
-    + "/results/battenberg-1.2/00-inputs/reference/"
-    + sys.argv[1]
     + "/battenberg_impute_v3",
 )
 
 fileOut = open(
     cwd
-    + "/results/battenberg-1.2/00-inputs/reference/"
-    + sys.argv[1]
     + "/impute_info.txt",
     "w",
 )
diff --git a/modules/bwa_mem/1.1/bwa_mem.smk b/modules/bwa_mem/1.1/bwa_mem.smk
index beba1e7f5..5d17a32a9 100644
--- a/modules/bwa_mem/1.1/bwa_mem.smk
+++ b/modules/bwa_mem/1.1/bwa_mem.smk
@@ -16,6 +16,26 @@ import os
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["bwa_mem"]`
 CFG = op.setup_module(
@@ -48,8 +68,8 @@ rule _bwa_mem_input_fastq:
         fastq_1 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R1.fastq.gz",
         fastq_2 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R2.fastq.gz",
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 
 rule _bwa_mem_run:
@@ -118,7 +138,7 @@ rule _bwa_mem_symlink_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
 
 
 rule _bwa_mem_symlink_sorted_bam:
@@ -130,7 +150,7 @@ rule _bwa_mem_symlink_sorted_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
         os.remove(input.bwa_mem_bam)
         shell("touch {input.bwa_mem_bam}.deleted")
 
@@ -146,8 +166,8 @@ rule _bwa_mem_output_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bam + ".bai")
+        op.relative_symlink(input.bam, output.bam, in_module=True)
+        op.relative_symlink(input.bai, output.bam + ".bai", in_module=True)
         os.remove(input.sorted_bam)
         shell("touch {input.sorted_bam}.deleted")
 
diff --git a/modules/bwa_mem/1.1/bwa_mem_grouped.smk b/modules/bwa_mem/1.1/bwa_mem_grouped.smk
index 22bc7229f..05afba605 100644
--- a/modules/bwa_mem/1.1/bwa_mem_grouped.smk
+++ b/modules/bwa_mem/1.1/bwa_mem_grouped.smk
@@ -16,6 +16,26 @@ import os
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["bwa_mem"]`
 CFG = op.setup_module(
@@ -50,8 +70,8 @@ rule _bwa_mem_input_fastq:
     group: 
         CFG["group"]['bwa-mem']
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 
 rule _bwa_mem_run:
@@ -121,7 +141,7 @@ rule _bwa_mem_symlink_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
 
 
 rule _bwa_mem_symlink_sorted_bam:
@@ -133,7 +153,7 @@ rule _bwa_mem_symlink_sorted_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
         os.remove(input.bwa_mem_bam)
         shell("touch {input.bwa_mem_bam}.deleted")
 
@@ -149,8 +169,8 @@ rule _bwa_mem_output_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_bwa_mem)
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bam + ".bai")
+        op.relative_symlink(input.bam, output.bam, in_module=True)
+        op.relative_symlink(input.bai, output.bam + ".bai", in_module=True)
         os.remove(input.sorted_bam)
         shell("touch {input.sorted_bam}.deleted")
 
diff --git a/modules/controlfreec/1.2/config/default.yaml b/modules/controlfreec/1.2/config/default.yaml
index bf32925ee..79fbc09d2 100755
--- a/modules/controlfreec/1.2/config/default.yaml
+++ b/modules/controlfreec/1.2/config/default.yaml
@@ -28,18 +28,19 @@ lcr-modules:
                 # 3: make a separate fragment of the unknown region and attach to left/right, choosing the longer one, BUT known region should make at least half size of the unknown region
                 # 4: make a separate fragment of the unknown region and do not assign any copy number to this region at all
             coefficientOfVariation: 0.062 # coefficient used to evaluate window size - the lower, the more windows
-            contaminationAdjustment: TRUE # if "contamination" value is not provided, it will automaticaly evaluate
-            degree: '3\&4' # degree of polynomial - 3&4 for WGS (GC-based normalization); 1 for WES (control-read-count-based normalization)
+            contaminationAdjustment: TRUE # if "contamination" value is not provided, it will automaticaly evaluate. For bugs where contamination detection is stalled, just set contaminationAdjustment to FALSE.
+            degree: '3\&4' # degree of polynomial - 3&4 for WGS (GC-based normalization); 1 for WES (control-read-count-based normalization). You can comment out degree to let control-freec choose.
             forceGCcontentNormalization: 1 #0 for WGS; 1 for WES # 0 forces control-base normalization, 1 forces GC
             intercept: 1 # 0 for control-based (paired) ; 1 for GC-content (unpaired)
             minCNAlength: 8 # minimum number of consecutive windows to call a CNA #default 1 for WGS; 3 for WES
-            minMappabilityPerWindow: 0.9 # minimum fraction of mappable positions for a window to be considered
+            minMappabilityPerWindow: 0.3 # minimum fraction of mappable positions for a window to be considered # set this lower if you want to also use a hard-masked mappability file
             minimalSubclonePresence: 20 # detects subclones present in x% of cell population - 20 for WGS; 30 for WES (100 means "do not look for subclones")
             noisyData: TRUE #set TRUE for exomes/FFPE libs to avoid false positives due to non-uniform capture
+            readCountThreshold: 10 # threshold on the minimal number of reads per window (used for exome-seq or targeted sequencing) (recommended 50 for WES)
             ploidy: 2 #will select the ploidy that explains the most CNAs (a range can be added and control-freec will assign ploidy based on best fit, ex. 2,3,4)
             printNA: FALSE
             telocentromeric: 50000 # size of pre-telomeric and pre-centromeric regions to exclude
-            uniqueMatch: TRUE # uses mappability profile to correct read counts
+            uniqueMatch: FALSE # uses mappability profile to correct read counts
             
             #optional options: (uncomment these options in config_WGS.txt to implement them)
             #if implemented, contamination will overrule contaminationAdjustment
@@ -54,21 +55,36 @@ lcr-modules:
             minQualityPerPosition: 20 # for BAF: minimum base quality
             shiftInQuality: 0 # basis for Phred quality
 
+            #GEM options: (for generating hard-masked mappability files)
+            hard_masked: True # set True if using a hard-masked mappability file
+            kmer: 100 # kmer size
+            mismatch: 2 # maximum number of mismatches allowed
+            maxBigIndel: 5 # The GEM mapper implements a special algorithm that, in addition to ordinary matches, is sometimes able to find a single long indel - this is the max size
+            maxEditDistance: 0 # maximum number of edit operations allowed while verifying candidate matches by dynamic programming (can be a float 0-1, which represents differences of size n% of length, or a non-negative integer, which is a fixed number of edits)
+            strata: 0 # a stratum is a set of matches all having the same string distance from the query, GEM mapper will try to find n amount of matches to explore
+
 
         software:
-            FREEC_sig: "{MODSDIR}/etc/scripts/assess_significance.R"
-            FREEC_graph: "{MODSDIR}/etc/scripts/makeGraph.R"
-            FREEC_graph_chr: "{MODSDIR}/etc/scripts/makeGraph_Chromosome.R"
-            freec2bed: "{MODSDIR}/etc/scripts/freec2bed.pl"
+            FREEC_sig: "{MODSDIR}/src/assess_significance.R"
+            FREEC_graph: "{MODSDIR}/src/makeGraph.R"
+            FREEC_graph_chr: "{MODSDIR}/src/makeGraph_Chromosome.R"
+            freec2bed: "{MODSDIR}/src/freec2bed.pl"
+            freec2circos: "{MODSDIR}/src/freec2circos.pl"
+            cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.4/cnv2igv.py"
 
         
         threads:
+            gem: 24
             controlfreec_run: 24
             calc_sig: 1
             plot: 1
             freec2bed: 1
+            freec2circos: 1
+            cnv2igv: 1
 
         resources:
+            gem:
+                mem_mb: 16000
             mpileup:
                 mem_mb: 8000
             cat:
@@ -83,6 +99,10 @@ lcr-modules:
                 mem_mb: 1000
             freec2bed:
                 mem_mb: 1000
+            freec2circos:
+                mem_mb: 1000
+            cnv2igv:
+                mem_mb: 1000
 
         
         pairing_config:
diff --git a/modules/controlfreec/1.2/config/freec/config_WGS.txt b/modules/controlfreec/1.2/config/freec/config_WGS.txt
index 44f46c954..1837430a7 100644
--- a/modules/controlfreec/1.2/config/freec/config_WGS.txt
+++ b/modules/controlfreec/1.2/config/freec/config_WGS.txt
@@ -26,6 +26,7 @@ minimalSubclonePresence = minimumSubclonePresenceValue
 noisyData = booNoise
 printNA = naBoo
 ploidy = ploidyInput
+readCountThreshold = rcCountThresold
 #step = stepValue
 telocentromeric = teloValue
 uniqueMatch = uniqBoo
@@ -48,7 +49,6 @@ mateOrientation = FR
 
 [BAF]
 
-fastaFile = fastaPath
 shiftInQuality = phredQuality
 SNPfile = DBsnpFile
 minimalCoveragePerPosition = minCovPerPos
diff --git a/modules/controlfreec/1.2/controlfreec.smk b/modules/controlfreec/1.2/controlfreec.smk
index af1da74b0..bb7e84ce6 100755
--- a/modules/controlfreec/1.2/controlfreec.smk
+++ b/modules/controlfreec/1.2/controlfreec.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["controlfreec"]`
 CFG = op.setup_module(
@@ -35,45 +55,136 @@ localrules:
 
 ##### RULES #####
 
+#### Rules for mappability reference 
+# to generate and use hard-masked mappability (i.e. recommended for FFPE genomes) if CFG["options"]["hard_masked"] == True
+# to use the default genome's mappability file (downloaded from their website), set it CFG["options"]["hard_masked"] == False
+if CFG["options"]["hard_masked"] == True:
+    CFG["runs"]["masked"] = "_masked"
+else:
+    CFG["runs"]["masked"] = ""
+    
+wildcard_constraints:
+    masked = ".{0}|_masked",
+    genome_build = ".+(?<!masked)"
+
 #### generate references ####
-rule _controlfreec_get_map_refs:
+# mappability tracks for hg19 and hg38 are available from the source
+if CFG["options"]["hard_masked"] == False:
+    rule _controlfreec_get_map_refs:
+        output:
+            tar = temp(CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/out100m2_{genome_build}.tar.gz"),
+            gem = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/out100m2_{genome_build}.gem"
+        params:
+            provider = "ensembl",
+            url = lambda w: {"grch37": "http://xfer.curie.fr/get/7hZIk1C63h0/hg19_len100bp.tar.gz",
+                            "hs37d5": "http://xfer.curie.fr/get/7hZIk1C63h0/hg19_len100bp.tar.gz",
+                            "hg19": "http://xfer.curie.fr/get/7hZIk1C63h0/hg19_len100bp.tar.gz",
+                            "grch38": "http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip",
+                            "hg38": "http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip"}[w.genome_build],
+            command1 = lambda w: {"grch37": "tar -xvf ",
+                                "hs37d5": "tar -xvf ",
+                                "hg19": "tar -xvf ",
+                                "grch38": "unzip ",
+                                "hg38": "unzip "}[w.genome_build],
+            command2 = lambda w: {"grch37": " --wildcards --no-anchored 'out100m2*gem' && mv out100m2_hg19.gem ",
+                                "hs37d5": " --wildcards --no-anchored 'out100m2*gem' && mv out100m2_hg19.gem ",
+                                "hg19": " --wildcards --no-anchored 'out100m2*gem' && mv out100m2_hg19.gem ",
+                                "grch38": " -d ",
+                                "hg38": " -d "}[w.genome_build],
+            command3 = lambda w: {"grch37": "out100m2_grch37.gem ",
+                                "hs37d5": "out100m2_hs37d5.gem ",
+                                "hg19": "out100m2_hg19.gem ",
+                                "grch38": " ",
+                                "hg38": " "}[w.genome_build],
+            outdir = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/"
+        shell:
+            "wget -O {output.tar} {params.url} "
+            "&& {params.command1} {output.tar} {params.command2} {params.outdir}{params.command3}"
+
+# mappability tracks for hard-masked genomes need to be generated using GEM
+rule _download_GEM:
     output:
-        tar = temp(CFG["dirs"]["inputs"] + "references/{genome_build}/freec/out100m2_{genome_build}.tar.gz"),
-        gem = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/out100m2_{genome_build}.gem"
+        touch(CFG["dirs"]["inputs"] + "references/GEM/.done")
     params:
-        provider = "ensembl",
-        url = lambda w: {"grch37": "http://xfer.curie.fr/get/7hZIk1C63h0/hg19_len100bp.tar.gz",
-                        "hg19": "http://xfer.curie.fr/get/7hZIk1C63h0/hg19_len100bp.tar.gz",
-                        "grch38": "http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip",
-                        "hg38": "http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip"}[w.genome_build],
-        command1 = lambda w: {"grch37": "tar -xvf ",
-                            "hg19": "tar -xvf ",
-                            "grch38": "unzip ",
-                            "hg38": "unzip "}[w.genome_build],
-        command2 = lambda w: {"grch37": " --wildcards --no-anchored 'out100m2*gem' && mv out100m2_hg19.gem ",
-                            "hg19": " --wildcards --no-anchored 'out100m2*gem' && mv out100m2_hg19.gem ",
-                            "grch38": " -d ",
-                            "hg38": " -d "}[w.genome_build],
-        outdir = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/"
+        dirOut = CFG["dirs"]["inputs"] + "references/GEM/"
+    resources: **CFG["resources"]["gem"]
     shell:
-        "wget -O {output.tar} {params.url} "
-        "&& {params.command1} {output.tar} {params.command2} {params.outdir}"
-
+        "wget https://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%203/GEM-binaries-Linux-x86_64-core_i3-20130406-045632.tbz2/download -O {params.dirOut}/GEM-lib.tbz2 && bzip2 -dc {params.dirOut}/GEM-lib.tbz2 | tar -xvf - -C {params.dirOut}/"
+
+# grch37 and grch38 from ensembl have additional information in header - need to remove
+if CFG["options"]["hard_masked"] == True:
+    rule _set_up_grch_genomes:
+        input:
+            reference = reference_files("genomes/{genome_build}{masked}/genome_fasta/genome.fa")
+        output:
+            reference = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/genome_header.fa"
+        resources: **CFG["resources"]["gem"]
+        shell:
+            "cat {input.reference} | perl -ne 's/(^\>\S+).+/$1/;print;' > {output.reference} "
+
+def get_genome_fasta(wildcards):
+    CFG = config["lcr-modules"]["controlfreec"]
+    if  "grch" in str({wildcards.genome_build}):
+        return  CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/genome_header.fa"
+    else:
+        return reference_files("genomes/{genome_build}{masked}/genome_fasta/genome.fa")
+
+if CFG["options"]["hard_masked"] == True:
+    rule _generate_gem_index:
+        input:
+            software = CFG["dirs"]["inputs"] + "references/GEM/.done",
+            reference = get_genome_fasta
+        output:
+            index = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all_index.gem"
+        params:
+            gemDir = CFG["dirs"]["inputs"] + "references/GEM/GEM-binaries-Linux-x86_64-core_i3-20130406-045632/bin",
+            idxpref = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all_index"
+        threads: CFG["threads"]["gem"]
+        resources: **CFG["resources"]["gem"]
+        log: CFG["logs"]["inputs"] + "gem/{genome_build}{masked}/gem_index.stderr.log"
+        shell:
+            "PATH=$PATH:{params.gemDir}; {params.gemDir}/gem-indexer -T {threads} -c dna -i {input.reference} -o {params.idxpref} > {log} 2>&1 "
+
+if CFG["options"]["hard_masked"] == True:
+    rule _generate_mappability:
+        input:
+            software = CFG["dirs"]["inputs"] + "references/GEM/.done",
+            index = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all_index.gem"
+        output:
+            mappability = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all.gem.mappability"
+        params:
+            gemDir = CFG["dirs"]["inputs"] + "references/GEM/GEM-binaries-Linux-x86_64-core_i3-20130406-045632/bin",
+            pref =  CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all.gem",
+            kmer = CFG["options"]["kmer"],
+            mismatch = CFG["options"]["mismatch"],
+            maxEditDistance = CFG["options"]["maxEditDistance"],
+            maxBigIndel = CFG["options"]["maxBigIndel"],
+            strata = CFG["options"]["strata"]
+        threads: CFG["threads"]["gem"]
+        resources: **CFG["resources"]["gem"]
+        log: CFG["logs"]["inputs"] + "gem/{genome_build}{masked}/gem_map.stderr.log"
+        shell:
+            "PATH=$PATH:{params.gemDir}; {params.gemDir}/gem-mappability -T {threads} -I {input.index} -l {params.kmer} -m {params.mismatch} -t disable --mismatch-alphabet ACGNT -e {params.maxEditDistance} --max-big-indel-length {params.maxBigIndel} -s {params.strata} -o {params.pref} > {log} 2>&1 "
+
+if CFG["options"]["hard_masked"] == True:
+    rule _symlink_map:
+        input:
+            mappability = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.hardmask.all.gem.mappability"
+        output:
+            mappability = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/out100m2_{genome_build}.gem"
+        resources: **CFG["resources"]["gem"]
+        shell:
+            "ln -srf {input.mappability} {output.mappability} "
+
+#### Rule for setting chromosome names (chr-prefix or not)
 # no chr for grch37 and grch38
 # chr for hg19 and hg38
-# Symlink chromosomes used (i.e. chr1-22,X,Y)
-checkpoint _controlfreec_input_chrs:
-    input:
-        chrs = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes_withY.txt")
-    output:
-        chrs = CFG["dirs"]["inputs"] + "references/{genome_build}/main_chromosomes_withY.txt"
-    run:
-        op.relative_symlink(input.chrs, output.chrs)
+# chromosomes used (i.e. chr1-22,X,Y)
 
 
 def _controlfreec_get_chr_fastas(wildcards):
     CFG = config["lcr-modules"]["controlfreec"]
-    chrs = checkpoints._controlfreec_input_chrs.get(**wildcards).output.chrs
+    chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes_withY.txt")
     with open(chrs) as file:
         chromosome = file.read().rstrip("\n").split("\n")
     fastas = expand(
@@ -86,9 +197,11 @@ def _controlfreec_get_chr_fastas(wildcards):
 #generates file with chromomsome lengths from genome.fa.fai
 rule _controlfreec_generate_chrLen:
     input:
-        fai = reference_files("genomes/{genome_build}/genome_fasta/genome.fa.fai")
+        fai = reference_files("genomes/{genome_build}{masked}/genome_fasta/genome.fa.fai"),
+        main = reference_files("genomes/{genome_build}{masked}/genome_fasta/main_chromosomes_withY.txt")
     output:
-        chrLen = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/{genome_build}.len"
+        chrLen = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.len"
+    resources: **CFG["resources"]["gem"]
     shell:
         op.as_one_line("""
             grep -P '^chr[0-9,X,Y]+\t|^[0-9,X,Y]' {input.fai} | awk '{{print $1"\t"$2}}' > {output.chrLen}
@@ -102,6 +215,7 @@ rule _controlfreec_generate_chrFasta:
         fasta = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/chr/{chromosome}.fa"
     conda:
         CFG["conda_envs"]["controlfreec"]
+    resources: **CFG["resources"]["gem"]
     shell:
         "samtools faidx {input.fasta} {wildcards.chromosome} > {output.fasta} "
 
@@ -118,6 +232,7 @@ rule _controlfreec_dbsnp_to_bed:
         vcf = reference_files("genomes/{genome_build}/variation/dbsnp.common_all-151.vcf.gz")
     output:
         bed = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/dbsnp.common_all-151.bed"
+    resources: **CFG["resources"]["gem"]
     shell:
         op.as_one_line(""" gunzip -c {input.vcf} | awk {{'printf ("%s\\t%s\\t%s\\n", $1,$2-1,$2)'}} | zgrep -v -h "^#" > {output.bed} """)
 
@@ -129,16 +244,18 @@ rule _controlfreec_input_bam:
         bai = CFG["inputs"]["sample_bai"]
     output:
         bam = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bam",
-        bai = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bai"
+        bai = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bai",
+        crai = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.crai"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bai, output.crai)
 
 
 #### set-up mpileups for BAF calling ####
 def _controlfreec_get_chr_mpileups(wildcards):
     CFG = config["lcr-modules"]["controlfreec"]
-    chrs = checkpoints._controlfreec_input_chrs.get(**wildcards).output.chrs
+    chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes_withY.txt")
     with open(chrs) as file:
         chrs = file.read().rstrip("\n").split("\n")
     mpileups = expand(
@@ -165,17 +282,16 @@ rule _controlfreec_mpileup_per_chrom:
     shell:
         "samtools mpileup -l {input.bed} -r {wildcards.chrom} -Q 20 -f {input.fastaFile} {input.bam} | gzip -c > {output.pileup} 2> {log.stderr}"
 
-
 rule _controlfreec_concatenate_pileups:
     input: 
-        _controlfreec_get_chr_mpileups
+        mpileup = _controlfreec_get_chr_mpileups
     output: 
         mpileup = temp(CFG["dirs"]["mpileup"] + "{seq_type}--{genome_build}/{sample_id}.bam_minipileup.pileup.gz")
     resources: 
         **CFG["resources"]["cat"]
     group: "controlfreec"
     shell: 
-        "cat {input} > {output.mpileup} "
+        "cat {input.mpileup} > {output.mpileup} "
 
 
 #### Run control-FREEC ####
@@ -185,19 +301,18 @@ rule _controlfreec_config:
     input:
         tumour_bam = CFG["dirs"]["mpileup"] + "{seq_type}--{genome_build}/{tumour_id}.bam_minipileup.pileup.gz",
         normal_bam = CFG["dirs"]["mpileup"] + "{seq_type}--{genome_build}/{normal_id}.bam_minipileup.pileup.gz",
-        fastaFile = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
-        reference = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/out100m2_{genome_build}.gem",
-        chrLen = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/{genome_build}.len",
+        reference = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/out100m2_{genome_build}.gem",
+        chrLen = CFG["dirs"]["inputs"] + "references/{genome_build}{masked}/freec/{genome_build}.len",
         done = CFG["dirs"]["inputs"] + "references/{genome_build}/freec/chr/.all_done"
     output:
-        CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/config_WGS.txt"
+        CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/config_WGS.txt"
     conda:
         CFG["conda_envs"]["controlfreec"]
     params:
         config = CFG["options"]["configFile"],
         dbSNP = reference_files("genomes/{genome_build}/variation/dbsnp.common_all-151.vcf.gz"),
         shiftInQuality = CFG["options"]["shiftInQuality"],
-        outdir = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/",
+        outdir = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/",
         window = CFG["options"]["window"],
         ploidy = CFG["options"]["ploidy"],
         breakPointValue = CFG["options"]["breakPointThreshold"],
@@ -217,6 +332,7 @@ rule _controlfreec_config:
         minimumSubclonePresence = CFG["options"]["minimalSubclonePresence"],
         naBoo = CFG["options"]["printNA"],
         noisyData = CFG["options"]["noisyData"],
+        readCountThreshold = CFG["options"]["readCountThreshold"],
         step = CFG["options"]["step"],
         telocentromeric = CFG["options"]["telocentromeric"],
         threads = CFG["threads"]["controlfreec_run"],
@@ -231,7 +347,6 @@ rule _controlfreec_config:
         "sed \"s|BAMFILE|{input.tumour_bam}|g\" {params.config} | "
         "sed \"s|CONTROLFILE|{input.normal_bam}|g\" | "
         "sed \"s|OUTDIR|{params.outdir}|g\" | "
-        "sed \"s|fastaPath|{input.fastaFile}|g\" | "
         "sed \"s|DBsnpFile|{params.dbSNP}|g\" | "
         "sed \"s|phredQuality|{params.shiftInQuality}|g\" | "
         "sed \"s|windowSize|{params.window}|g\" | "
@@ -257,6 +372,7 @@ rule _controlfreec_config:
         "sed \"s|minQualPerPos|{params.minimalQualityPerPosition}|g\" | "
         "sed \"s|booNoise|{params.noisyData}|g\" | "
         "sed \"s|stepValue|{params.step}|g\" | "
+        "sed \"s|rcCountThresold|{params.readCountThreshold}|g\" | "
         "sed \"s|teloValue|{params.telocentromeric}|g\" | "
         "sed \"s|uniqBoo|{params.uniqBoo}|g\" | "
         "sed \"s|naBoo|{params.naBoo}|g\" | "
@@ -266,81 +382,116 @@ rule _controlfreec_config:
 
 rule _controlfreec_run:
     input:
-        config = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/config_WGS.txt",
+        config = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/config_WGS.txt",
         tumour_bam = CFG["dirs"]["mpileup"] + "{seq_type}--{genome_build}/{tumour_id}.bam_minipileup.pileup.gz",
         normal_bam = CFG["dirs"]["mpileup"] + "{seq_type}--{genome_build}/{normal_id}.bam_minipileup.pileup.gz",
     output:
-        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt",
-        ratio = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
-        CNV = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs",
-        BAF = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt"
+        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt",
+        ratio = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
+        CNV = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs",
+        BAF = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt"
     conda: CFG["conda_envs"]["controlfreec"]
     threads: CFG["threads"]["controlfreec_run"]
     resources: **CFG["resources"]["controlfreec_run"]
     log:
-        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/run.stdout.log",
-        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/run.stderr.log"
+        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/run.stdout.log",
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/run.stderr.log"
     shell:
         "freec -conf {input.config} > {log.stdout} 2> {log.stderr} "
 
 
 rule _controlfreec_calc_sig:
     input:
-        CNVs = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs",
-        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
+        CNVs = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs",
+        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
     output:
-        txt = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs.p.value.txt"
+        txt = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs.p.value.txt"
     params:
         calc_sig = CFG["software"]["FREEC_sig"]
     threads: CFG["threads"]["calc_sig"]
     resources: **CFG["resources"]["calc_sig"]
     conda: CFG["conda_envs"]["controlfreec"]
     log:         
-        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/calc_sig.stdout.log",
-        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/calc_sig.stderr.log"
+        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/calc_sig.stdout.log",
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/calc_sig.stderr.log"
     shell:
         "cat {params.calc_sig} | R --slave --args {input.CNVs} {input.ratios} > {log.stdout} 2> {log.stderr}"
 
 
 rule _controlfreec_plot:
     input:
-        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
-        BAF = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt",
-        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt"
+        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
+        BAF = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt",
+        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt"
     output:
-        plot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt.png",
-        log2plot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt.log2.png",
-        bafplot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt.png"
+        plot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt.png",
+        log2plot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt.log2.png",
+        bafplot = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_BAF.txt.png"
     params:
         plot = CFG["software"]["FREEC_graph"]
     threads: CFG["threads"]["plot"]
     resources: **CFG["resources"]["plot"]
     conda: CFG["conda_envs"]["controlfreec"]
     log: 
-        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/plot.stdout.log",
-        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/plot.stderr.log"
+        stdout = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/plot.stdout.log",
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/plot.stderr.log"
     shell:
         "cat {params.plot} | R --slave --args `grep \"Output_Ploidy\" {input.info} | cut -f 2` {input.ratios} {input.BAF} > {log.stdout} 2> {log.stderr} "
 
 
 rule _controlfreec_freec2bed:
     input:
-        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
-        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt"
+        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
+        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt"
     output:
-        bed = CFG["dirs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bed"
+        bed = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bed"
     params:
         freec2bed = CFG["software"]["freec2bed"]
     threads: CFG["threads"]["freec2bed"]
     resources: **CFG["resources"]["freec2bed"]
     conda: CFG["conda_envs"]["controlfreec"]
     log:
-        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/freec2bed.stderr.log"
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/freec2bed.stderr.log"
     shell:
         "ploidy=$(grep Output_Ploidy {input.info} | cut -f 2); "
         "perl {params.freec2bed} -f {input.ratios} -p $ploidy > {output.bed} 2> {log.stderr}"
 
 
+rule _controlfreec_freec2circos:
+    input:
+        ratios = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_ratio.txt",
+        info = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_info.txt"
+    output:
+        circos = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.circos.bed"
+    params:
+        freec2circos = CFG["software"]["freec2circos"]
+    threads: CFG["threads"]["freec2circos"]
+    resources: **CFG["resources"]["freec2circos"]
+    conda: CFG["conda_envs"]["controlfreec"]
+    log:
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/freec2circos.stderr.log"
+    shell:
+        "ploidy=$(grep Output_Ploidy {input.info} | cut -f 2); "
+        "perl {params.freec2circos} -f {input.ratios} -p $ploidy > {output.circos} 2> {log.stderr}"
+        
+        
+rule _controlfreec_cnv2igv:
+    input:
+        cnv = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.bam_minipileup.pileup.gz_CNVs.p.value.txt"
+    output:
+        seg = CFG["dirs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.CNVs.seg"
+    params:
+        tumour_id = "{tumour_id}",
+        cnv2igv = CFG["software"]["cnv2igv"]
+    threads: CFG["threads"]["cnv2igv"]
+    resources: **CFG["resources"]["cnv2igv"]
+    conda: CFG["conda_envs"]["controlfreec"]
+    log:
+        stderr = CFG["logs"]["run"] + "{seq_type}--{genome_build}{masked}/{tumour_id}--{normal_id}--{pair_status}/cnv2igv.stderr.log"
+    shell:
+        "python3 {params.cnv2igv} --mode controlfreec --sample {params.tumour_id} {input.cnv} > {output.seg} 2> {log.stderr} "
+
+
 # Symlinks the final output files into the module results directory (under '99-outputs/')
 rule _controlfreec_output:
     input:
@@ -350,23 +501,29 @@ rule _controlfreec_output:
         bed = str(rules._controlfreec_freec2bed.output.bed),
         BAF = str(rules._controlfreec_run.output.BAF),
         BAFgraph = str(rules._controlfreec_plot.output.bafplot),
-        ratio = str(rules._controlfreec_run.output.ratio)
+        ratio = str(rules._controlfreec_run.output.ratio),
+        circos = str(rules._controlfreec_freec2circos.output.circos),
+        igv = str(rules._controlfreec_cnv2igv.output.seg)
     output:
-        plot = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/plots/{tumour_id}--{normal_id}--{pair_status}.ratio.png",
-        log2plot = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/log2plots/{tumour_id}--{normal_id}--{pair_status}.ratio.log2.png",
-        CNV = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/CNV/{tumour_id}--{normal_id}--{pair_status}.CNVs.p.value.txt",
-        bed = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/bed/{tumour_id}--{normal_id}--{pair_status}.CNVs.bed",
-        BAF = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/BAF/{tumour_id}--{normal_id}--{pair_status}.BAF.txt",
-        BAFgraph = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/BAFplot/{tumour_id}--{normal_id}--{pair_status}.BAF.png",
-        ratio = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/ratio/{tumour_id}--{normal_id}--{pair_status}.ratio.txt"
+        plot = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/plots/{tumour_id}--{normal_id}--{pair_status}.ratio.png",
+        log2plot = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/log2plots/{tumour_id}--{normal_id}--{pair_status}.ratio.log2.png",
+        CNV = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/CNV/{tumour_id}--{normal_id}--{pair_status}.CNVs.p.value.txt",
+        bed = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/bed/{tumour_id}--{normal_id}--{pair_status}.CNVs.bed",
+        BAF = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/BAF/{tumour_id}--{normal_id}--{pair_status}.BAF.txt",
+        BAFgraph = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/BAFplot/{tumour_id}--{normal_id}--{pair_status}.BAF.png",
+        ratio = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/ratio/{tumour_id}--{normal_id}--{pair_status}.ratio.txt",
+        circos = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/circos/{tumour_id}--{normal_id}--{pair_status}.circos.bed",
+        igv = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}{masked}/igv/{tumour_id}--{normal_id}--{pair_status}.igv.seg"
     run:
-        op.relative_symlink(input.plot, output.plot)
-        op.relative_symlink(input.log2plot, output.log2plot)
-        op.relative_symlink(input.CNV, output.CNV)
-        op.relative_symlink(input.bed, output.bed)
-        op.relative_symlink(input.BAF, output.BAF)
-        op.relative_symlink(input.BAFgraph, output.BAFgraph)
-        op.relative_symlink(input.ratio, output.ratio)
+        op.relative_symlink(input.plot, output.plot, in_module = True)
+        op.relative_symlink(input.log2plot, output.log2plot, in_module = True)
+        op.relative_symlink(input.CNV, output.CNV, in_module = True)
+        op.relative_symlink(input.bed, output.bed, in_module = True)
+        op.relative_symlink(input.BAF, output.BAF, in_module = True)
+        op.relative_symlink(input.BAFgraph, output.BAFgraph, in_module = True)
+        op.relative_symlink(input.ratio, output.ratio, in_module = True)
+        op.relative_symlink(input.circos, output.circos, in_module = True)
+        op.relative_symlink(input.igv, output.igv, in_module = True)
 
 
 # Generates the target sentinels for each run, which generate the symlinks
@@ -380,14 +537,17 @@ rule _controlfreec_all:
                 str(rules._controlfreec_output.output.bed),
                 str(rules._controlfreec_output.output.BAF),
                 str(rules._controlfreec_output.output.BAFgraph),
-                str(rules._controlfreec_output.output.ratio)
+                str(rules._controlfreec_output.output.ratio),
+                str(rules._controlfreec_output.output.circos),
+                str(rules._controlfreec_output.output.igv)
             ],
             zip,  # Run expand() with zip(), not product()
             seq_type=CFG["runs"]["tumour_seq_type"],
             genome_build=CFG["runs"]["tumour_genome_build"],
             pair_status=CFG["runs"]["pair_status"],
             tumour_id=CFG["runs"]["tumour_sample_id"],
-            normal_id=CFG["runs"]["normal_sample_id"])
+            normal_id=CFG["runs"]["normal_sample_id"],
+            masked=CFG["runs"]["masked"])
 
 
 
diff --git a/modules/controlfreec/1.2/etc/scripts/_makeGraph_Chromosome.R b/modules/controlfreec/1.2/src/_makeGraph_Chromosome.R
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/_makeGraph_Chromosome.R
rename to modules/controlfreec/1.2/src/_makeGraph_Chromosome.R
diff --git a/modules/controlfreec/1.2/etc/scripts/assess_significance.R b/modules/controlfreec/1.2/src/assess_significance.R
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/assess_significance.R
rename to modules/controlfreec/1.2/src/assess_significance.R
diff --git a/modules/controlfreec/1.2/etc/scripts/freec2bed.pl b/modules/controlfreec/1.2/src/freec2bed.pl
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/freec2bed.pl
rename to modules/controlfreec/1.2/src/freec2bed.pl
diff --git a/modules/controlfreec/1.2/etc/scripts/freec2circos.pl b/modules/controlfreec/1.2/src/freec2circos.pl
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/freec2circos.pl
rename to modules/controlfreec/1.2/src/freec2circos.pl
diff --git a/modules/controlfreec/1.2/etc/scripts/get_fasta_lengths.pl b/modules/controlfreec/1.2/src/get_fasta_lengths.pl
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/get_fasta_lengths.pl
rename to modules/controlfreec/1.2/src/get_fasta_lengths.pl
diff --git a/modules/controlfreec/1.2/etc/scripts/makeGraph.R b/modules/controlfreec/1.2/src/makeGraph.R
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/makeGraph.R
rename to modules/controlfreec/1.2/src/makeGraph.R
diff --git a/modules/controlfreec/1.2/etc/scripts/makeGraph_Chromosome.R b/modules/controlfreec/1.2/src/makeGraph_Chromosome.R
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/makeGraph_Chromosome.R
rename to modules/controlfreec/1.2/src/makeGraph_Chromosome.R
diff --git a/modules/controlfreec/1.2/etc/scripts/vcf2snpFreec.pl b/modules/controlfreec/1.2/src/vcf2snpFreec.pl
similarity index 100%
rename from modules/controlfreec/1.2/etc/scripts/vcf2snpFreec.pl
rename to modules/controlfreec/1.2/src/vcf2snpFreec.pl
diff --git a/modules/controlfreec/CHANGELOG.md b/modules/controlfreec/CHANGELOG.md
index e1b646c2e..d20964b8f 100755
--- a/modules/controlfreec/CHANGELOG.md
+++ b/modules/controlfreec/CHANGELOG.md
@@ -58,4 +58,9 @@ Notably, in paired mode, with BAF mode on, FREEC normalizes with GC-content, and
 
 This implementation has been tested on unmatched samples too using a high coverage, normal FFPE sample, and it has shown to display clean profiles in these cases too.
 
-Note: this version is not meant for capture/exome data.
\ No newline at end of file
+Note: this version is not meant for capture/exome data.
+
+## [1.2] patch 2021-02-25
+Added GEM mappability features - can now use/generate a hard-masked mappability file (useful for FFPE genomes) with the setting "hard_masked" = True. If this is set, GEM will be installed and ran on your reference genome of choice.
+
+Also added freec2circos function.
\ No newline at end of file
diff --git a/modules/gatk_rnaseq/1.0/config/default.yaml b/modules/gatk_rnaseq/1.0/config/default.yaml
index 72b4d793d..bc7561a5e 100644
--- a/modules/gatk_rnaseq/1.0/config/default.yaml
+++ b/modules/gatk_rnaseq/1.0/config/default.yaml
@@ -67,11 +67,10 @@ lcr-modules:
                 mem_mb: 12000
                 bam: 1
             gatk_variant_calling:
-                mem_mb: 48000
+                mem_mb: 12000
                 bam: 1
             gatk_variant_filtration:
                 mem_mb: 12000
-                bam: 1
             merge_vcfs:
                 mem_mb: 10000
             gatk_rnaseq_passed:
diff --git a/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk b/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk
index 493a5504b..f6f880234 100644
--- a/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk
+++ b/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk
@@ -60,10 +60,12 @@ rule _gatk_rnaseq_input_bam:
         bai = CFG["inputs"]["sample_bai"]
     output:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
-        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bai"
+        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai",
+        crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai",
     run:
         op.absolute_symlink(input.bam, output.bam)
         op.absolute_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bai, output.crai)
 
 
 rule _gatk_splitntrim:
@@ -71,7 +73,8 @@ rule _gatk_splitntrim:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa")
     output:
-        bam = temp(CFG["dirs"]["gatk_splitntrim"] +  "bam/{seq_type}--{genome_build}/{sample_id}.split_reassign_mq.bam")
+        bam = temp(CFG["dirs"]["gatk_splitntrim"] +  "bam/{seq_type}--{genome_build}/{sample_id}.split_reassign_mq.bam"),
+        bai = temp(CFG["dirs"]["gatk_splitntrim"] +  "bam/{seq_type}--{genome_build}/{sample_id}.split_reassign_mq.bai")
     log:
         stdout = CFG["logs"]["gatk_splitntrim"] + "{seq_type}--{genome_build}/{sample_id}.gatk_splitntrim.stdout.log",
         stderr = CFG["logs"]["gatk_splitntrim"] + "{seq_type}--{genome_build}/{sample_id}.gatk_splitntrim.stderr.log"
@@ -81,6 +84,8 @@ rule _gatk_splitntrim:
         gatk_opts = CFG["options"]["gatk_splitntrim"]
     conda:
         CFG["conda_envs"]["gatk_rnaseq"]
+    group: "split_bam"
+    priority: 50
     threads:
         CFG["threads"]["gatk_splitntrim"]
     resources:
@@ -94,9 +99,11 @@ rule _gatk_splitntrim:
 
 rule _gatk_addRG:
     input:
-        bam = str(rules._gatk_splitntrim.output)
+        bam = str(rules._gatk_splitntrim.output.bam),
+        bai = str(rules._gatk_splitntrim.output.bai)
     output:
-        bam = temp(CFG["dirs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.withRG.bam")
+        bam = temp(CFG["dirs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.withRG.bam"),
+        bai = temp(CFG["dirs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.withRG.bam.bai")
     params:
         sampleName = "{sample_id}",
         platform = CFG["options"]["gatk_addRG"]["platform"],
@@ -104,6 +111,8 @@ rule _gatk_addRG:
         stringency = CFG["options"]["gatk_addRG"]["stringency"]
     conda:
         CFG["conda_envs"]["picard"]
+    group: "split_bam"
+    priority: 40
     log:
         stdout = CFG["logs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.addRG.stdout.log"
     threads:
@@ -118,7 +127,7 @@ rule _gatk_addRG:
 
 rule _gatk_base_recalibration:
     input:
-        bam = str(rules._gatk_addRG.output),
+        bam = str(rules._gatk_addRG.output.bam),
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa")
     output:
         table = CFG["dirs"]["base_recal_report"] + "{seq_type}--{genome_build}/{sample_id}.recalibration_report.grp"
@@ -133,6 +142,8 @@ rule _gatk_base_recalibration:
         gatk_opts = CFG["options"]["gatk_baserecalibrator"]
     conda:
         CFG["conda_envs"]["gatk_rnaseq"]
+    group: "split_bam"
+    priority: 30
     threads: CFG["threads"]["gatk_base_recalibration"]
     resources:
         **CFG["resources"]["gatk_base_recalibration"]
@@ -145,6 +156,7 @@ rule _gatk_base_recalibration:
 rule _gatk_applybqsr:
     input:
         bam = str(rules._gatk_addRG.output.bam),
+        bai = str(rules._gatk_addRG.output.bai),
         table = str(rules._gatk_base_recalibration.output.table),
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa")
     output:
@@ -159,6 +171,8 @@ rule _gatk_applybqsr:
         gatk_opts = CFG["options"]["gatk_applybqsr"]
     conda:
         CFG["conda_envs"]["gatk_rnaseq"]
+    group: "split_bam"
+    priority: 20
     threads: CFG["threads"]["gatk_applybqsr"]
     resources:
         **CFG["resources"]["gatk_applybqsr"]
@@ -188,6 +202,7 @@ rule _gatk_variant_calling:
         gatk_opts = CFG["options"]["gatk_variant_calling"]["gatk_opts"]
     conda:
         CFG["conda_envs"]["gatk_rnaseq"]
+    group: "split_bam"
     threads: CFG["threads"]["gatk_variant_calling"]
     resources:
         **CFG["resources"]["gatk_variant_calling"]
diff --git a/modules/gridss/1.1/gridss.smk b/modules/gridss/1.1/gridss.smk
index 51483f749..b4935ce10 100644
--- a/modules/gridss/1.1/gridss.smk
+++ b/modules/gridss/1.1/gridss.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["gridss"]`
@@ -24,13 +44,13 @@ CFG = op.setup_module(
     subdirectories = ["inputs", "preprocess", "gridss", "viral_annotation", "gripss", "outputs"],
 )
 
-VERSION_MAP = {
+GRIDSS_VERSION_MAP = {
     "grch37": "hg19", 
     "hs37d5": "hg19", 
     "hg38": "hg38"
 }
 
-possible_genome_builds = VERSION_MAP.keys()
+possible_genome_builds = GRIDSS_VERSION_MAP.keys()
 for genome_build in CFG["runs"]["tumour_genome_build"]:
     assert genome_build in possible_genome_builds, (
         "Samples table includes genome builds not yet compatible with this module. "
@@ -85,7 +105,7 @@ rule _gridss_get_pon:
         pon_breakend = CFG["dirs"]["inputs"] + "references/{genome_build}/pon/gridss_pon_single_breakend.bed", 
         known_pairs = CFG["dirs"]["inputs"] + "references/{genome_build}/pon/KnownFusionPairs.bedpe"
     params: 
-        alt_build = lambda w: VERSION_MAP[w.genome_build], 
+        alt_build = lambda w: GRIDSS_VERSION_MAP[w.genome_build],
         url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/gridss/pon"
     shell: 
         op.as_one_line("""
@@ -162,8 +182,8 @@ rule _gridss_input_bam:
         sample_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam", 
         sample_bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai" 
     run:
-        op.relative_symlink(input.sample_bam, output.sample_bam)
-        op.relative_symlink(input.sample_bai, output.sample_bai)
+        op.absolute_symlink(input.sample_bam, output.sample_bam)
+        op.absolute_symlink(input.sample_bai, output.sample_bai)
 
 # Preprocess unmatched normal bams
 rule _gridss_preprocess_unmatched_normal:
@@ -206,12 +226,12 @@ rule _gridss_symlink_preprocessed_normal:
     input: 
         workdir = str(rules._gridss_preprocess_unmatched_normal.output.workdir)
     output: 
-        workdir = temp(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{sample_id}.bam.gridss.working")
+        workdir = temp(directory(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{sample_id}.bam.gridss.working"))
     priority: 0
     wildcard_constraints: 
         sample_id = "|".join(unmatched_normal_ids)
     run: 
-        op.relative_symlink(input.workdir, output.workdir)
+        op.absolute_symlink(input.workdir, output.workdir)
 
 # Preprocess all other bams as part of the group job
 rule _gridss_preprocess:
@@ -488,9 +508,9 @@ rule _gridss_output_viral_vcf:
         tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_viral_annotation_filtered.vcf.gz.tbi", 
         bedpe = CFG["dirs"]["outputs"] + "bedpe/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_viral_annotation_filtered.bedpe"
     run:
-        op.relative_symlink(input.vcf, output.vcf)
-        op.relative_symlink(input.tbi, output.tbi)
-        op.relative_symlink(input.bedpe, output.bedpe)
+        op.relative_symlink(input.vcf, output.vcf, in_module=True)
+        op.relative_symlink(input.tbi, output.tbi, in_module=True)
+        op.relative_symlink(input.bedpe, output.bedpe, in_module=True)
 
 rule _gridss_output_somatic_vcf:
     input:
@@ -506,11 +526,11 @@ rule _gridss_output_somatic_vcf:
         filtered_tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic_filtered.vcf.gz.tbi", 
         bedpe = CFG["dirs"]["outputs"] + "bedpe/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic_filtered.bedpe"
     run:
-        op.relative_symlink(input.somatic, output.somatic)
-        op.relative_symlink(input.somatic_tbi, output.somatic_tbi)
-        op.relative_symlink(input.filtered, output.filtered)
-        op.relative_symlink(input.filtered_tbi, output.filtered_tbi)
-        op.relative_symlink(input.bedpe, output.bedpe)
+        op.relative_symlink(input.somatic, output.somatic, in_module=True)
+        op.relative_symlink(input.somatic_tbi, output.somatic_tbi, in_module=True)
+        op.relative_symlink(input.filtered, output.filtered, in_module=True)
+        op.relative_symlink(input.filtered_tbi, output.filtered_tbi, in_module=True)
+        op.relative_symlink(input.bedpe, output.bedpe, in_module=True)
 
 def _gridss_predict_output(wildcards): 
     """Request symlinks for all VCF files.
diff --git a/modules/gridss/2.0/config/default.yaml b/modules/gridss/2.0/config/default.yaml
new file mode 100644
index 000000000..d1cf1624f
--- /dev/null
+++ b/modules/gridss/2.0/config/default.yaml
@@ -0,0 +1,62 @@
+lcr-modules:
+    
+    gridss:
+
+        inputs:
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            sample_bam: "__UPDATE__"
+            sample_bai: "__UPDATE__"
+
+        scratch_subdirectories: [] # Recommended: ["gridss", "preprocess"]
+
+        options:
+            gridss: 
+                --picardoptions VALIDATION_STRINGENCY=SILENT
+            filter_unpaired: 
+            gripss: 
+            # Hard filters remove variants from output VCF
+            # Soft filters add flags to output VCF
+            # These flags don't work with the current version of GRIPSS
+            # A fix is being prepared by the developers
+                -hard_max_normal_absolute_support 3
+                -hard_max_normal_relative_support 0.06
+                -soft_max_normal_relative_support 0.03       
+        
+        conda_envs:
+            wget: "{MODSDIR}/envs/wget-1.20.1.yaml"
+            gridss: "{MODSDIR}/envs/gridss-2.12.0.yaml"
+            gripss: "{MODSDIR}/envs/hmftools-gripss-1.11.yaml"
+            bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml"
+            svtools: "{MODSDIR}/envs/svtools-0.5.1.yaml"
+            
+        threads:
+            preprocess: 8
+            gridss: 24
+            repeatmasker: 24
+            filter_gridss: 1
+            gripss: 1 # Not multi-threaded
+            split: 1
+
+        resources:
+            preprocess: 
+                mem_mb: 37500
+                preprocess: 1
+            gridss: 
+                mem_mb: 37500 # Recommended per GRIDSS manual
+                gridss: 1
+            repeatmasker: 
+                mem_mb: 100000
+            gripss: 
+                mem_mb: 20000 # May need to be increased for FFPE tumours
+            split: 
+                mem_mb: 2000
+
+        pairing_config:
+            genome:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: "unmatched_normal"
+                run_paired_tumours_as_unpaired: False
+            capture:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: "unmatched_normal"
+                run_paired_tumours_as_unpaired: False
diff --git a/modules/gridss/2.0/envs/bcftools-1.10.2.yaml b/modules/gridss/2.0/envs/bcftools-1.10.2.yaml
new file mode 120000
index 000000000..72959e7bb
--- /dev/null
+++ b/modules/gridss/2.0/envs/bcftools-1.10.2.yaml
@@ -0,0 +1 @@
+../../../../envs/bcftools/bcftools-1.10.2.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/gridss-2.12.0.yaml b/modules/gridss/2.0/envs/gridss-2.12.0.yaml
new file mode 120000
index 000000000..d827cc395
--- /dev/null
+++ b/modules/gridss/2.0/envs/gridss-2.12.0.yaml
@@ -0,0 +1 @@
+../../../../envs/gridss/gridss-2.12.0.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/gridss-dependencies-2.9.4.yaml b/modules/gridss/2.0/envs/gridss-dependencies-2.9.4.yaml
new file mode 120000
index 000000000..b7fb269d0
--- /dev/null
+++ b/modules/gridss/2.0/envs/gridss-dependencies-2.9.4.yaml
@@ -0,0 +1 @@
+../../../../envs/gridss/gridss-dependencies-2.9.4.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/hmftools-gripss-1.11.yaml b/modules/gridss/2.0/envs/hmftools-gripss-1.11.yaml
new file mode 120000
index 000000000..6a1656b5d
--- /dev/null
+++ b/modules/gridss/2.0/envs/hmftools-gripss-1.11.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-gripss-1.11.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/hmftools-gripss-1.4.0.yaml b/modules/gridss/2.0/envs/hmftools-gripss-1.4.0.yaml
new file mode 120000
index 000000000..ca91e8c3f
--- /dev/null
+++ b/modules/gridss/2.0/envs/hmftools-gripss-1.4.0.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-gripss-1.4.0.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/hmftools-gripss-1.8.yaml b/modules/gridss/2.0/envs/hmftools-gripss-1.8.yaml
new file mode 120000
index 000000000..b0c2af4a4
--- /dev/null
+++ b/modules/gridss/2.0/envs/hmftools-gripss-1.8.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-gripss-1.8.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/svtools-0.5.1.yaml b/modules/gridss/2.0/envs/svtools-0.5.1.yaml
new file mode 120000
index 000000000..6dc2ec0ca
--- /dev/null
+++ b/modules/gridss/2.0/envs/svtools-0.5.1.yaml
@@ -0,0 +1 @@
+../../../../envs/svtools/svtools-0.5.1.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/envs/wget-1.20.1.yaml b/modules/gridss/2.0/envs/wget-1.20.1.yaml
new file mode 120000
index 000000000..86501e72a
--- /dev/null
+++ b/modules/gridss/2.0/envs/wget-1.20.1.yaml
@@ -0,0 +1 @@
+../../../../envs/wget/wget-1.20.1.yaml
\ No newline at end of file
diff --git a/modules/gridss/2.0/gridss.smk b/modules/gridss/2.0/gridss.smk
new file mode 100644
index 000000000..99cbdd275
--- /dev/null
+++ b/modules/gridss/2.0/gridss.smk
@@ -0,0 +1,508 @@
+#!/usr/bin/env snakemake
+
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Laura Hilton
+# Module Author:    Laura Hilton
+# Contributors:     N/A
+
+
+##### SETUP #####
+
+
+# Import package with useful functions for developing analysis modules
+import oncopipe as op
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
+# Setup module and store module-specific configuration in `CFG`
+# `CFG` is a shortcut to `config["lcr-modules"]["gridss"]`
+CFG = op.setup_module(
+    name = "gridss",
+    version = "2.0",
+    subdirectories = ["inputs", "preprocess", "gridss", "repeatmasker", "gripss", "outputs"],
+)
+
+VERSION_MAP_GRIDSS = {
+    "grch37": "hg19", 
+    "hs37d5": "hg19", 
+    "hg38": "hg38"
+}
+
+possible_genome_builds = VERSION_MAP_GRIDSS.keys()
+for genome_build in CFG["runs"]["tumour_genome_build"]:
+    assert genome_build in possible_genome_builds, (
+        "Samples table includes genome builds not yet compatible with this module. "
+        "This module is currently only compatible with {possible_genome_builds}. "
+    )
+
+sample_ids = list(CFG['samples']['sample_id'])
+unmatched_normal_ids = list(config["lcr-modules"]["_shared"]["unmatched_normal_ids"].values())
+all_other_ids = list(set(sample_ids) - set(unmatched_normal_ids))
+
+# Define rules to be run locally when using a compute cluster
+localrules:
+    _gridss_input_bam,
+    _gridss_input_references,
+    _gridss_setup_references,
+    _gridss_get_pon, 
+    _gridss_symlink_preprocessed_normal, 
+    _gridss_filter_gripss,
+    _gridss_gripss_to_bedpe, 
+    _gridss_output_somatic_vcf,
+    _gridss_all
+
+
+
+##### RULES #####
+
+# Symlink genome fasta with bwa and .fai indices to the same directory
+rule _gridss_input_references: 
+    input: 
+        genome_fa = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
+        genome_bwa_prefix = reference_files("genomes/{genome_build}/bwa_index/bwa-0.7.17/genome.fa"),
+    output: 
+        genome_fa = CFG["dirs"]["inputs"] + "references/{genome_build}/genome_fa/genome.fa", 
+    shell: 
+        op.as_one_line("""
+        ln -sf {input.genome_fa} {output.genome_fa} &&
+        ln -sf {input.genome_fa}.fai {output.genome_fa}.fai &&
+        ln -sf {input.genome_bwa_prefix}.* `dirname {output.genome_fa}`
+        """)
+
+# Download the panel of normals
+rule _gridss_get_pon: 
+    output: 
+        pon_breakpoint = CFG["dirs"]["inputs"] + "references/{genome_build}/pon/gridss_pon_breakpoint.bedpe", 
+        pon_breakend = CFG["dirs"]["inputs"] + "references/{genome_build}/pon/gridss_pon_single_breakend.bed", 
+        known_pairs = CFG["dirs"]["inputs"] + "references/{genome_build}/pon/KnownFusionPairs.bedpe"
+    params: 
+        alt_build = lambda w: VERSION_MAP_GRIDSS[w.genome_build], 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/gridss/pon"
+    shell: 
+        op.as_one_line("""
+        wget -O {output.pon_breakpoint} {params.url}/gridss_pon_breakpoint.{params.alt_build}.bedpe; 
+        wget -O {output.pon_breakend} {params.url}/gridss_pon_single_breakend.{params.alt_build}.bed; 
+        wget -O {output.known_pairs} {params.url}/KnownFusionPairs.{params.alt_build}.bedpe
+        """)
+
+
+# Generage genome.fa.img file
+rule _gridss_setup_references: 
+    input: 
+        fasta = str(rules._gridss_input_references.output.genome_fa), 
+    output: 
+        genome_img = CFG["dirs"]["inputs"] + "references/{genome_build}/genome_fa/genome.fa.img"
+    params: 
+        steps = "setupreference"
+    conda: 
+        CFG["conda_envs"]["gridss"]
+    resources: 
+        mem_mb = 4000
+    threads: 8
+    shell: 
+        op.as_one_line("""
+        gridss
+        --reference {input.fasta}
+        --threads {threads}
+        --jvmheap 3G
+        --steps {params.steps} 
+        --workingdir `dirname {output.genome_img}`
+        """)
+
+
+# Symlink the input files into the module results directory (under '00-inputs/')
+rule _gridss_input_bam:
+    input:
+        sample_bam = CFG["inputs"]["sample_bam"], 
+        sample_bai = CFG["inputs"]["sample_bai"] 
+    output:
+        sample_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam", 
+        sample_bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai" 
+    run:
+        op.absolute_symlink(input.sample_bam, output.sample_bam)
+        op.absolute_symlink(input.sample_bai, output.sample_bai)
+
+# Preprocess unmatched normal bams
+rule _gridss_preprocess_unmatched_normal:
+    input:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
+        fasta = str(rules._gridss_input_references.output.genome_fa),
+        fasta_img = str(rules._gridss_setup_references.output.genome_img)
+    output:
+        workdir = directory(CFG["dirs"]["preprocess"] + "{seq_type}--{genome_build}/{sample_id}.bam.gridss.working")
+    log: CFG["logs"]["preprocess"] + "{seq_type}--{genome_build}/{sample_id}/preprocess.log"
+    params:
+        opts = CFG["options"]["gridss"], 
+        steps = "preprocess", 
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8) 
+    conda:
+        CFG["conda_envs"]["gridss"]
+    threads:
+        CFG["threads"]["gridss"]
+    resources:
+        **CFG["resources"]["gridss"]
+    priority: 1
+    wildcard_constraints:
+        sample_id="|".join(unmatched_normal_ids)
+    shell:
+        op.as_one_line("""
+        gridss
+        --reference {input.fasta}
+        --workingdir $(dirname {output.workdir}) 
+        --threads {threads}
+        --jvmheap {params.mem_mb}m
+        --steps {params.steps}
+        {params.opts}
+        {input.bam} 
+        2>&1 | tee -a {log}
+        """)
+
+# Symlink preprocessed sv.bam directories
+
+rule _gridss_symlink_preprocessed_normal: 
+    input: 
+        workdir = str(rules._gridss_preprocess_unmatched_normal.output.workdir)
+    output: 
+        workdir = temp(directory(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{sample_id}.bam.gridss.working"))
+    priority: 0
+    wildcard_constraints: 
+        sample_id = "|".join(unmatched_normal_ids)
+    run: 
+        op.absolute_symlink(input.workdir, output.workdir)
+
+# Preprocess all other bams as part of the group job
+rule _gridss_preprocess:
+    input:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
+        fasta = str(rules._gridss_input_references.output.genome_fa),
+        fasta_img = str(rules._gridss_setup_references.output.genome_img)
+    output:
+        workdir = temp(directory(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{sample_id}.bam.gridss.working"))
+    log: CFG["logs"]["preprocess"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{sample_id}/preprocess.log"
+    params:
+        opts = CFG["options"]["gridss"], 
+        steps = "preprocess", 
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8) 
+    conda:
+        CFG["conda_envs"]["gridss"]
+    threads:
+        CFG["threads"]["preprocess"]
+    resources:
+        **CFG["resources"]["preprocess"]
+    group: "enormous_bam"
+    wildcard_constraints: 
+        sample_id = "|".join(all_other_ids)
+    shell:
+        op.as_one_line("""
+        gridss
+        --reference {input.fasta}
+        --workingdir $(dirname {output.workdir}) 
+        --threads {threads}
+        --jvmheap {params.mem_mb}m
+        --steps {params.steps}
+        {params.opts}
+        {input.bam} 
+        2>&1 | tee -a {log}
+        """)
+
+def get_input_per_patient(wildcards):
+    CFG = config['lcr-modules']['gridss']
+    PATIENT = op.filter_samples(CFG["runs"], tumour_patient_id = wildcards.patient_id)
+    if wildcards.pair_status in ["matched", "unmatched"]:
+        SAMPLES = PATIENT['normal_sample_id'].unique().tolist() + PATIENT['tumour_sample_id'].tolist()
+        bams = expand(
+            [
+                str(rules._gridss_input_bam.output.sample_bam)
+            ], 
+            zip, 
+            sample_id = SAMPLES, 
+            allow_missing = True
+        )
+        preproc = expand(
+            [
+                str(rules._gridss_preprocess.output.workdir)
+            ], 
+            zip, 
+            sample_id = SAMPLES, 
+            allow_missing = True
+        )
+    elif wildcards.pair_status == "no_normal": 
+        bams = expand(
+            [
+                str(rules._gridss_input_bam.output.sample_bam)
+            ], 
+            zip, 
+            sample_id = PATIENT["tumour_sample_id"], 
+            allow_missing = True
+        )
+        preproc = expand(
+            [
+                str(rules._gridss_preprocess.output.workdir)
+            ], 
+            zip, 
+            sample_id = PATIENT["tumour_sample_id"], 
+            allow_missing = True
+        )
+    return {'bams': bams, 'preproc': preproc}
+
+def get_input_sample_ids(wildcards):
+    CFG = config['lcr-modules']['gridss']
+    PATIENT = op.filter_samples(CFG["runs"], tumour_patient_id = wildcards.patient_id)
+    if wildcards.pair_status in ["matched", "unmatched"]:
+        ids = ",".join([",".join(PATIENT['normal_sample_id'].unique().tolist()), ",".join(PATIENT['tumour_sample_id'].tolist())])
+    elif wildcards.pair_status == "no_normal": 
+        ids = ",".join(PATIENT['tumour_sample_id'])
+    return ids
+
+# Run GRIDSS in paired mode
+rule _gridss_run:
+    input:
+        unpack(get_input_per_patient),
+        fasta = str(rules._gridss_input_references.output.genome_fa),
+        fasta_img = str(rules._gridss_setup_references.output.genome_img), 
+        blacklist = reference_files("genomes/{genome_build}/encode/encode-blacklist.{genome_build}.bed") 
+    output:
+        vcf = temp(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss_raw.vcf.gz"),
+        assembly = temp(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/assembly.bam"), 
+        assembly_dir = temp(directory(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/assembly.bam.gridss.working")), 
+        vcf_dir = temp(directory(CFG["dirs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss_raw.vcf.gz.gridss.working"))
+    log: CFG["logs"]["gridss"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss.log"
+    params:
+        ids = lambda wildcards: get_input_sample_ids(wildcards),
+        opts = CFG["options"]["gridss"], 
+        steps = "assemble,call", 
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8) 
+    conda:
+        CFG["conda_envs"]["gridss"]
+    threads:
+        CFG["threads"]["gridss"]
+    resources:
+        **CFG["resources"]["gridss"]
+    group: "enormous_bam"
+    shell:
+        op.as_one_line("""
+        gridss
+        --reference {input.fasta}
+        --output {output.vcf}
+        --workingdir `dirname {output.vcf}`
+        --assembly {output.assembly}
+        --blacklist {input.blacklist}
+        --threads {threads}
+        --jvmheap {params.mem_mb}m
+        --labels "{params.ids}"
+        --steps {params.steps}
+        {params.opts}
+        {input.bams} 
+        2>&1 | tee -a {log}
+        """)
+   
+# Annotate GRIDSS VCF with Repeatmasker
+rule _gridss_annotate_repeatmasker: 
+    input: 
+        vcf = str(rules._gridss_run.output.vcf)
+    output: 
+        vcf = temp(CFG["dirs"]["repeatmasker"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss_repeatmasker.vcf.gz"), 
+        tbi = temp(CFG["dirs"]["repeatmasker"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss_repeatmasker.vcf.gz.tbi")
+    log: CFG["logs"]["repeatmasker"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/gridss_repeatmasker.log"
+    params:
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8) 
+    conda:
+        CFG["conda_envs"]["gridss"]
+    threads:
+        CFG["threads"]["repeatmasker"]
+    resources:
+        **CFG["resources"]["repeatmasker"]
+    shell:
+        op.as_one_line("""
+        gridss_annotate_vcf_repeatmasker 
+        -o {output.vcf}
+        -t {threads} 
+        -w $(dirname {output.vcf})
+        {input.vcf} 
+        > {log} 2>&1 
+        """)
+
+def get_split_ids(wildcards): 
+    CFG = config['lcr-modules']['gridss']
+    if wildcards.normal_id == "None": 
+        return wildcards.tumour_id
+    else: 
+        return wildcards.normal_id + "," + wildcards.tumour_id
+
+rule _gridss_split_vcf: 
+    input: 
+        vcf = str(rules._gridss_run.output.vcf)
+    output: 
+        vcf = temp(CFG['dirs']['repeatmasker'] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.gridss_split.vcf.gz"), 
+        tbi = temp(CFG['dirs']['repeatmasker'] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.gridss_split.vcf.gz.tbi")
+    log: CFG["logs"]["repeatmasker"] + "{seq_type}--{genome_build}/{patient_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.gridss_split_vcf.log"
+    params: 
+        ids = lambda wildcards: get_split_ids(wildcards),
+    conda: 
+        CFG["conda_envs"]["bcftools"]
+    threads: CFG['threads']['split']
+    resources: 
+        **CFG['resources']['split']
+    shell: 
+        op.as_one_line("""
+        bcftools view -s {params.ids} -Oz -o {output.vcf} {input.vcf} 2> {log} && 
+        tabix -p vcf {output.vcf}
+        """)
+
+def get_split_vcf(wildcards): 
+    CFG = config['lcr-modules']['gridss']
+    TUMOUR = op.filter_samples(CFG['runs'], tumour_sample_id = wildcards.tumour_id)
+    vcf = expand(
+        str(rules._gridss_split_vcf.output.vcf), 
+        patient_id = TUMOUR['tumour_patient_id'], 
+        allow_missing = True
+    )
+    return {'vcf': vcf}
+
+def get_gripss_sample_id_cli(wildcards):
+    CFG = config['lcr-modules']['gridss']
+    TUMOUR = op.filter_samples(CFG["runs"], tumour_sample_id = wildcards.tumour_id)
+    if wildcards.pair_status in ["matched", "unmatched"]:
+        return "-tumor " + str("".join(TUMOUR['tumour_sample_id'])) + " -reference " + str("".join(TUMOUR['normal_sample_id']))
+    elif wildcards.pair_status == "no_normal": 
+        return "-tumor " + str("".join(TUMOUR['tumour_sample_id']))
+
+# Perform somatic filtering against the panel of normals    
+rule _gridss_run_gripss: 
+    input: 
+        unpack(get_split_vcf), 
+        fasta = str(rules._gridss_input_references.output.genome_fa),
+        pon_breakend = str(rules._gridss_get_pon.output.pon_breakend), 
+        pon_breakpoint = str(rules._gridss_get_pon.output.pon_breakpoint), 
+        known_pairs = str(rules._gridss_get_pon.output.known_pairs)
+    output: 
+        vcf = CFG["dirs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic.vcf.gz", 
+        tbi = CFG["dirs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic.vcf.gz.tbi"
+    log: log = CFG["logs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gripss.log"
+    resources: 
+        **CFG["resources"]["gripss"]
+    params:
+        cli = lambda wildcards: get_gripss_sample_id_cli(wildcards),
+        opts = CFG["options"]["gripss"], 
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8)
+    conda: 
+        CFG["conda_envs"]["gripss"]
+    threads: 
+        CFG["threads"]["gripss"]
+    shell: 
+        op.as_one_line(""" 
+        gripss -Xms4G -Xmx{params.mem_mb}m 
+        -ref_genome {input.fasta} 
+        -breakend_pon {input.pon_breakend} 
+        -breakpoint_pon {input.pon_breakpoint} 
+        -breakpoint_hotspot {input.known_pairs}  
+        -input_vcf {input.vcf} 
+        -output_vcf {output.vcf} 
+        {params.cli} 
+        {params.opts} 
+        2>&1 | tee -a {log} 
+        """)
+    
+rule _gridss_filter_gripss: 
+    input: 
+        vcf = str(rules._gridss_run_gripss.output.vcf)
+    output: 
+        vcf = CFG["dirs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz", 
+        tbi = CFG["dirs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz.tbi"
+    conda: 
+        CFG["conda_envs"]["bcftools"]
+    shell: 
+        op.as_one_line("""
+        zcat {input.vcf} | 
+            awk '$7 == "PASS" || $1 ~ /^#/ ' | 
+            bcftools view -Oz -o {output.vcf} && 
+        tabix -p vcf {output.vcf}
+        """)
+
+rule _gridss_gripss_to_bedpe: 
+    input: 
+        vcf = str(rules._gridss_filter_gripss.output.vcf)
+    output: 
+        bedpe = CFG["dirs"]["gripss"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.bedpe"
+    conda: 
+        CFG["conda_envs"]["svtools"]
+    shell: 
+        op.as_one_line("""
+        zcat {input.vcf} | 
+            awk '$1 ~ /^#/ || $5 ~ /:/' | 
+            svtools vcftobedpe | grep -v "##" > {output.bedpe}
+        """)
+     
+
+# Symlink the final output files into the module results directory (under '99-outputs/')
+rule _gridss_output_somatic_vcf:
+    input:
+        filtered = str(rules._gridss_filter_gripss.output.vcf), 
+        filtered_tbi = str(rules._gridss_filter_gripss.output.tbi), 
+        somatic = str(rules._gridss_run_gripss.output.vcf), 
+        somatic_tbi = str(rules._gridss_run_gripss.output.tbi),
+        bedpe = str(rules._gridss_gripss_to_bedpe.output.bedpe)
+    output:
+        somatic = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic.vcf.gz", 
+        somatic_tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic.vcf.gz.tbi", 
+        filtered = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic_filtered.vcf.gz", 
+        filtered_tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic_filtered.vcf.gz.tbi", 
+        bedpe = CFG["dirs"]["outputs"] + "bedpe/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.gridss_somatic_filtered.bedpe"
+    run:
+        op.relative_symlink(input.somatic, output.somatic, in_module=True)
+        op.relative_symlink(input.somatic_tbi, output.somatic_tbi, in_module=True)
+        op.relative_symlink(input.filtered, output.filtered, in_module=True)
+        op.relative_symlink(input.filtered_tbi, output.filtered_tbi, in_module=True)
+        op.relative_symlink(input.bedpe, output.bedpe, in_module=True)
+
+
+
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _gridss_all:
+    input:
+        expand(
+            [
+                str(rules._gridss_output_somatic_vcf.output.filtered), 
+                str(rules._gridss_output_somatic_vcf.output.filtered_tbi), 
+                str(rules._gridss_output_somatic_vcf.output.somatic), 
+                str(rules._gridss_output_somatic_vcf.output.somatic_tbi), 
+                str(rules._gridss_output_somatic_vcf.output.bedpe)
+            ],
+            zip,  # Run expand() with zip(), not product()
+            seq_type=CFG["runs"]["tumour_seq_type"],
+            genome_build=CFG["runs"]["tumour_genome_build"],
+            tumour_id=CFG["runs"]["tumour_sample_id"],
+            normal_id=CFG["runs"]["normal_sample_id"],
+            pair_status=CFG["runs"]["pair_status"]
+        )
+
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/gridss/2.0/schemas/base-1.0.yaml b/modules/gridss/2.0/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/gridss/2.0/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/gridss/CHANGELOG.md b/modules/gridss/CHANGELOG.md
index f8be466c5..b260775b4 100644
--- a/modules/gridss/CHANGELOG.md
+++ b/modules/gridss/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to the `gridss` module will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.0] - 2021-12-29
+This release was authored by Laura Hilton. 
+- Implementing joint calling per patient for multi-timepoint samples. 
+- Module updates enable CRAM support. 
+
 ## [1.1] - 2020-10-09
 This release was authored by Laura Hilton. See the [GRIDSS man page](https://github.com/PapenfussLab/gridss) for extensive documentation. 
 - Add automatic reference file downloading from files hosted at the BCGSC [downloads page](https://bcgsc.ca/downloads/morinlab/hmftools-references/gridss/).
diff --git a/modules/hmftools/1.0/hmftools.smk b/modules/hmftools/1.0/hmftools.smk
index f8163d269..352ba8934 100644
--- a/modules/hmftools/1.0/hmftools.smk
+++ b/modules/hmftools/1.0/hmftools.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["hmftools"]`
 CFG = op.setup_module(
@@ -40,23 +60,19 @@ localrules:
     _hmftools_all
 
 
-VERSION_MAP = {
+HMFTOOLS_VERSION_MAP = {
     "grch37": "hg19",
     "hs37d5": "hg19",
     "hg38": "hg38"
 }
 
-possible_genome_builds = VERSION_MAP.keys()
+possible_genome_builds = HMFTOOLS_VERSION_MAP.keys()
 for genome_build in CFG["runs"]["tumour_genome_build"]:
     assert genome_build in possible_genome_builds, (
         "Samples table includes genome builds not yet compatible with this module. "
         "This module is currently only compatible with {possible_genome_builds}. "
     )
 
-wildcard_constraints: 
-    genome_build = "|".join(VERSION_MAP.keys()), 
-    pair_status = "matched|unmatched"
-
 
 ##### RULES #####
 
@@ -69,9 +85,12 @@ rule _hmftools_input_bam:
     output:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam", 
         bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bai", 
+    wildcard_constraints:
+        genome_build = "|".join(HMFTOOLS_VERSION_MAP.keys()),
+        pair_status = "matched|unmatched"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
 
 rule _hmftools_input_strelka: 
     input: 
@@ -79,7 +98,7 @@ rule _hmftools_input_strelka:
     output: 
         strelka_vcf = CFG["dirs"]["inputs"] + "strelka_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic.combined.vcf.gz" 
     run: 
-        op.relative_symlink(input.strelka_vcf, output.strelka_vcf)
+        op.absolute_symlink(input.strelka_vcf, output.strelka_vcf)
 
 rule _hmftools_input_gridss: 
     input: 
@@ -93,10 +112,10 @@ rule _hmftools_input_gridss:
         gridss_filtered_vcf = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz", 
         gridss_filtered_tbi = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz.tbi"
     run: 
-        op.relative_symlink(input.gridss_somatic_vcf, output.gridss_somatic_vcf)
-        op.relative_symlink(input.gridss_somatic_tbi, output.gridss_somatic_tbi)
-        op.relative_symlink(input.gridss_filtered_vcf, output.gridss_filtered_vcf)
-        op.relative_symlink(input.gridss_filtered_tbi, output.gridss_filtered_tbi)
+        op.absolute_symlink(input.gridss_somatic_vcf, output.gridss_somatic_vcf)
+        op.absolute_symlink(input.gridss_somatic_tbi, output.gridss_somatic_tbi)
+        op.absolute_symlink(input.gridss_filtered_vcf, output.gridss_filtered_vcf)
+        op.absolute_symlink(input.gridss_filtered_tbi, output.gridss_filtered_tbi)
 
 # Rules to download and setup reference files
 
@@ -121,7 +140,7 @@ rule _hmftools_get_cobalt_gc:
         gc = CFG["dirs"]["inputs"] + "references/{genome_build}/cobalt/GC_profile.1000bp.cnp"
     params: 
         url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/cobalt",
-        alt_build = lambda w: VERSION_MAP[w.genome_build]
+        alt_build = lambda w: HMFTOOLS_VERSION_MAP[w.genome_build]
     conda: 
         CFG["conda_envs"]["wget"]
     shell: 
@@ -133,7 +152,7 @@ rule _hmftools_get_amber_snps:
         snpcheck = CFG["dirs"]["inputs"] + "references/{genome_build}/amber/GermlineHetPon.snpcheck.vcf.gz"
     params: 
         url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/amber",
-        alt_build = lambda w: VERSION_MAP[w.genome_build]
+        alt_build = lambda w: HMFTOOLS_VERSION_MAP[w.genome_build]
     conda: 
         CFG["conda_envs"]["wget"]
     shell: 
@@ -146,7 +165,7 @@ rule _hmftools_get_purple_drivers:
         gene_panel = CFG["dirs"]["inputs"] + "references/{genome_build}/purple/DriverGenePanel.tsv"
     params: 
         url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/purple",
-        alt_build = lambda w: VERSION_MAP[w.genome_build]
+        alt_build = lambda w: HMFTOOLS_VERSION_MAP[w.genome_build]
     conda: 
         CFG["conda_envs"]["wget"]
     shell: 
@@ -493,7 +512,7 @@ rule _hmftools_linx:
     resources: 
         **CFG["resources"]["linx"]
     params: 
-      alt_build = lambda w: VERSION_MAP[w.genome_build], 
+      alt_build = lambda w: HMFTOOLS_VERSION_MAP[w.genome_build],
       ensembl_build = lambda w: {
           "grch37": "HG37",
           "hs37d5": "HG37", 
@@ -628,7 +647,7 @@ rule _hmftools_purple_output:
     output:
         files = CFG["dirs"]["outputs"] + "purple_output/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.purple.{out_file}" 
     run:
-        op.relative_symlink(input.files, output.files)
+        op.relative_symlink(input.files, output.files, in_module=True)
 
 rule _hmftools_purple_plots: 
     input:
@@ -636,7 +655,7 @@ rule _hmftools_purple_plots:
     output: 
         plots = CFG["dirs"]["outputs"] + "purple_plots/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{plot_name}.png"
     run: 
-        op.relative_symlink(input.plots, output.plots)
+        op.relative_symlink(input.plots, output.plots, in_module=True)
 
 
 rule _hmftools_linx_plots: 
diff --git a/modules/hmftools/1.1/config/default.yaml b/modules/hmftools/1.1/config/default.yaml
new file mode 100644
index 000000000..5df6e6685
--- /dev/null
+++ b/modules/hmftools/1.1/config/default.yaml
@@ -0,0 +1,101 @@
+lcr-modules:
+    
+    hmftools:
+
+        # TODO: Update the list of available wildcards, if applicable
+        inputs:
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            sample_bam: "__UPDATE__"
+            sample_bai: "__UPDATE__"
+            # Available wildcards: {seq_type} {genome_build} {tumour_id} 
+            # Note: SLMS-3 outputs are recommended. 
+            # The pipeline will take any VCF where the samples are labeled 'TUMOR' and 'NORMAL', 
+            # and where the VCF is annotated with "AD" and "DP" fields. 
+            # Must be in bgzip with `.vcf.gz` extension. 
+            slms3_vcf: "__UPDATE__" 
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            # Note: These are output by the gripss somatic filtering step of the gridss module
+            gridss_somatic: "__UPDATE__" # Output of GRIPSS
+            gridss_somatic_tbi: "__UPDATE"
+            gridss_somatic_filtered: "__UPDATE__" # Filtered output of GRIPSS
+            gridss_somatic_filtered_tbi: "__UPDATE__"
+
+        scratch_subdirectories: []
+
+        switches: 
+            ensembl_url: 
+                '37': "mysql://ensembldb.ensembl.org:3337/homo_sapiens_core_89_37"
+                '38': "mysql://ensembldb.ensembl.org:3306/homo_sapiens_core_98_38"
+
+        options:
+            use_masked_ref: False
+            amber: 
+                -validation_stringency SILENT
+            cobalt: 
+                -validation_stringency SILENT
+            purple: ""
+            linx: ""
+            linx_viz:
+                -fusion_legend_height_per_row 70
+                -segment_relative_size 0.5
+                -outer_radius 0.85
+                -min_line_size 4 
+                -max_line_size 18
+                -min_label_size 45 
+                -max_label_size 50
+                -glyph_size 25
+                -exon_rank_radius 0.04
+                -max_gene_characters 15
+            linx_viz_annotate: 
+                -fusion_legend_height_per_row 70
+                -segment_relative_size 0.5
+                -outer_radius 0.85
+                -min_line_size 4 
+                -max_line_size 18
+                -min_label_size 45 
+                -max_label_size 50
+                -glyph_size 25
+                -exon_rank_radius 0.04
+                -max_gene_characters 15
+
+        conda_envs:
+            samtools: "{MODSDIR}/envs/samtools-1.9.yaml"
+            wget: "{MODSDIR}/envs/wget-1.20.1.yaml"
+            bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml"
+            amber: "{MODSDIR}/envs/hmftools-amber-3.5.yaml"
+            cobalt: "{MODSDIR}/envs/hmftools-cobalt-1.11.yaml"
+            purple: "{MODSDIR}/envs/hmftools-purple-2.54.yaml"
+            linx: "{MODSDIR}/envs/hmftools-linx-1.15.yaml"
+            linx_annotate: "{MODSDIR}/envs/hmftools-linx-1.15.yaml"
+            snpeff: "{MODSDIR}/envs/snpeff-4.3.1t.yaml"
+            
+        threads:
+            vcf_sample_names: 1
+            snpeff: 4
+            amber: 16
+            cobalt: 16
+            purple: 8
+            linx: 2
+            linx_viz: 8
+
+        resources:
+            vcf_sample_names: 
+                mem_mb: 1000
+            snpeff: 
+                mem_mb: 5000
+            amber:  
+                mem_mb: 36000
+            cobalt:  
+                mem_mb: 20000
+            purple:  
+                mem_mb: 20000
+            linx:  
+                mem_mb: 10000
+            linx_viz:  
+                mem_mb: 20000
+
+        pairing_config:
+            genome:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: "unmatched_normal"
+                run_paired_tumours_as_unpaired: False
diff --git a/modules/hmftools/1.1/envs/bcftools-1.10.2.yaml b/modules/hmftools/1.1/envs/bcftools-1.10.2.yaml
new file mode 120000
index 000000000..72959e7bb
--- /dev/null
+++ b/modules/hmftools/1.1/envs/bcftools-1.10.2.yaml
@@ -0,0 +1 @@
+../../../../envs/bcftools/bcftools-1.10.2.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-amber-3.4.yaml b/modules/hmftools/1.1/envs/hmftools-amber-3.4.yaml
new file mode 120000
index 000000000..fac6fa12b
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-amber-3.4.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-amber-3.4.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-amber-3.5.yaml b/modules/hmftools/1.1/envs/hmftools-amber-3.5.yaml
new file mode 120000
index 000000000..71dfe9fb6
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-amber-3.5.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-amber-3.5.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-cobalt-1.11.yaml b/modules/hmftools/1.1/envs/hmftools-cobalt-1.11.yaml
new file mode 120000
index 000000000..d671910e9
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-cobalt-1.11.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-cobalt-1.11.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-cobalt-1.8.yaml b/modules/hmftools/1.1/envs/hmftools-cobalt-1.8.yaml
new file mode 120000
index 000000000..eb143618e
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-cobalt-1.8.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-cobalt-1.8.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-cobalt-1.9.yaml b/modules/hmftools/1.1/envs/hmftools-cobalt-1.9.yaml
new file mode 120000
index 000000000..8c4af7acd
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-cobalt-1.9.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-cobalt-1.9.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-linx-1.10.yaml b/modules/hmftools/1.1/envs/hmftools-linx-1.10.yaml
new file mode 120000
index 000000000..6383f8839
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-linx-1.10.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-linx-1.10.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-linx-1.11.yaml b/modules/hmftools/1.1/envs/hmftools-linx-1.11.yaml
new file mode 120000
index 000000000..09c92d668
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-linx-1.11.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-linx-1.11.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-linx-1.15.yaml b/modules/hmftools/1.1/envs/hmftools-linx-1.15.yaml
new file mode 120000
index 000000000..f789beede
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-linx-1.15.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-linx-1.15.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-purple-2.44.yaml b/modules/hmftools/1.1/envs/hmftools-purple-2.44.yaml
new file mode 120000
index 000000000..43d663aa7
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-purple-2.44.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-purple-2.44.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-purple-2.45.yaml b/modules/hmftools/1.1/envs/hmftools-purple-2.45.yaml
new file mode 120000
index 000000000..dd0a26c59
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-purple-2.45.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-purple-2.45.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-purple-2.48.yaml b/modules/hmftools/1.1/envs/hmftools-purple-2.48.yaml
new file mode 120000
index 000000000..bb5438531
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-purple-2.48.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-purple-2.48.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/hmftools-purple-2.54.yaml b/modules/hmftools/1.1/envs/hmftools-purple-2.54.yaml
new file mode 120000
index 000000000..66e18c157
--- /dev/null
+++ b/modules/hmftools/1.1/envs/hmftools-purple-2.54.yaml
@@ -0,0 +1 @@
+../../../../envs/hmftools/hmftools-purple-2.54.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/samtools-1.9.yaml b/modules/hmftools/1.1/envs/samtools-1.9.yaml
new file mode 120000
index 000000000..ab29288bb
--- /dev/null
+++ b/modules/hmftools/1.1/envs/samtools-1.9.yaml
@@ -0,0 +1 @@
+../../../../envs/samtools/samtools-1.9.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/snpeff-4.3.1t.yaml b/modules/hmftools/1.1/envs/snpeff-4.3.1t.yaml
new file mode 120000
index 000000000..c452e525b
--- /dev/null
+++ b/modules/hmftools/1.1/envs/snpeff-4.3.1t.yaml
@@ -0,0 +1 @@
+../../../../envs/snpeff/snpeff-4.3.1t.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/envs/wget-1.20.1.yaml b/modules/hmftools/1.1/envs/wget-1.20.1.yaml
new file mode 120000
index 000000000..86501e72a
--- /dev/null
+++ b/modules/hmftools/1.1/envs/wget-1.20.1.yaml
@@ -0,0 +1 @@
+../../../../envs/wget/wget-1.20.1.yaml
\ No newline at end of file
diff --git a/modules/hmftools/1.1/hmftools.smk b/modules/hmftools/1.1/hmftools.smk
new file mode 100644
index 000000000..db45ff9e3
--- /dev/null
+++ b/modules/hmftools/1.1/hmftools.smk
@@ -0,0 +1,629 @@
+#!/usr/bin/env snakemake
+
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Laura Hilton
+# Module Author:    Laura Hilton
+# Contributors:     N/A
+
+
+##### SETUP #####
+
+
+# Import package with useful functions for developing analysis modules
+import oncopipe as op
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+# Setup module and store module-specific configuration in `CFG`
+# `CFG` is a shortcut to `config["lcr-modules"]["hmftools"]`
+CFG = op.setup_module(
+    name = "hmftools",
+    version = "1.1",
+    subdirectories = ["inputs", "prepare_slms3", "amber", "cobalt", "purple", "linx", "outputs"],
+)
+
+# Define rules to be run locally when using a compute cluster
+localrules:
+    _hmftools_input_bam,
+    _hmftools_input_slms3,
+    _hmftools_slms3_sample_names, 
+    _hmftools_input_gridss, 
+    _hmftools_input_references,
+    _hmftools_get_cobalt_gc,
+    _hmftools_get_cobalt_bed, 
+    _hmftools_get_amber_snps, 
+    _hmftools_get_purple_drivers, 
+    _hmftools_get_linx_db,
+    _hmftools_get_ensembl_cache, 
+    _hmftools_purple_output,
+    _hmftools_purple_plots,  
+    _hmftools_all
+
+
+VERSION_MAP_HMFTOOLS = {
+    "grch37": "37",
+    "hs37d5": "37",
+    "hg38": "38"
+}
+
+possible_genome_builds = VERSION_MAP_HMFTOOLS.keys()
+for genome_build in CFG["runs"]["tumour_genome_build"]:
+    assert genome_build in possible_genome_builds, (
+        "Samples table includes genome builds not yet compatible with this module. "
+        "This module is currently only compatible with {possible_genome_builds}. "
+    )
+
+
+masked_string = "" 
+if CFG["options"]["use_masked_ref"]:
+    masked_string = "_masked"
+
+
+##### RULES #####
+
+
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _hmftools_input_bam:
+    input:
+        bam = CFG["inputs"]["sample_bam"], 
+        bai = CFG["inputs"]["sample_bai"], 
+    output:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam", 
+        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bai", 
+    group: "input_and_vcf"
+    wildcard_constraints:
+        genome_build = "|".join(VERSION_MAP_HMFTOOLS.keys()),
+        pair_status = "matched|unmatched"
+    run:
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
+
+rule _hmftools_input_slms3: 
+    input: 
+        vcf = CFG["inputs"]["slms3_vcf"], 
+    output: 
+        vcf = CFG["dirs"]["inputs"] + "slms3_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/slms3.vcf.gz" 
+    group: "input_and_vcf"
+    run: 
+        op.relative_symlink(input.vcf, output.vcf)
+
+rule _hmftools_input_gridss: 
+    input: 
+        gridss_somatic_vcf = CFG["inputs"]["gridss_somatic"], 
+        gridss_somatic_tbi = CFG["inputs"]["gridss_somatic_tbi"], 
+        gridss_filtered_vcf = CFG["inputs"]["gridss_somatic_filtered"], 
+        gridss_filtered_tbi = CFG["inputs"]["gridss_somatic_filtered_tbi"]
+    output: 
+        gridss_somatic_vcf = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic.vcf.gz",
+        gridss_somatic_tbi = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic.vcf.gz.tbi",
+        gridss_filtered_vcf = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz", 
+        gridss_filtered_tbi = CFG["dirs"]["inputs"] + "gridss_vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/gridss_somatic_filtered.vcf.gz.tbi"
+    group: "input_and_vcf"
+    run: 
+        op.absolute_symlink(input.gridss_somatic_vcf, output.gridss_somatic_vcf)
+        op.absolute_symlink(input.gridss_somatic_tbi, output.gridss_somatic_tbi)
+        op.absolute_symlink(input.gridss_filtered_vcf, output.gridss_filtered_vcf)
+        op.absolute_symlink(input.gridss_filtered_tbi, output.gridss_filtered_tbi)
+
+# Rules to download and setup reference files
+
+rule _hmftools_input_references: 
+    input: 
+        genome_fa = reference_files("genomes/{genome_build}" + masked_string + "/genome_fasta/genome.fa"),
+        genome_fai = reference_files("genomes/{genome_build}" + masked_string + "/genome_fasta/genome.fa.fai"),
+        genome_dict = reference_files("genomes/{genome_build}" + masked_string + "/genome_fasta/genome.dict")
+    output: 
+        genome_fa = CFG["dirs"]["inputs"] + "references/{genome_build}" + masked_string + "/genome_fa/genome.fa", 
+        genome_fai = CFG["dirs"]["inputs"] + "references/{genome_build}" + masked_string + "/genome_fa/genome.fa.fai", 
+        genome_dict = CFG["dirs"]["inputs"] + "references/{genome_build}" + masked_string + "/genome_fa/genome.dict"
+    shell: 
+        op.as_one_line("""
+        ln -s {input.genome_fa} {output.genome_fa} &&
+        ln -s {input.genome_fai} {output.genome_fai} &&
+        ln -s {input.genome_dict} {output.genome_dict}
+        """)
+
+rule _hmftools_get_cobalt_gc: 
+    output: 
+        gc = CFG["dirs"]["inputs"] + "references/{genome_build}/cobalt/GC_profile.1000bp.cnp"
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/cobalt",
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -O {output.gc} {params.url}/GC_profile.1000bp.{params.alt_build}.cnp'
+
+rule _hmftools_get_cobalt_bed: 
+    output: 
+        bed = CFG["dirs"]["inputs"] + "references/{genome_build}/cobalt/DiploidRegions.bed"
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/cobalt",
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -O {output.bed} {params.url}/DiploidRegions.{params.alt_build}.bed'
+
+rule _hmftools_get_amber_snps: 
+    output: 
+        vcf = CFG["dirs"]["inputs"] + "references/{genome_build}/amber/GermlineHetPon.vcf.gz", 
+        snpcheck = CFG["dirs"]["inputs"] + "references/{genome_build}/amber/Amber.snpcheck.vcf.gz"
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/amber",
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -O {output.vcf} {params.url}/GermlineHetPon.{params.alt_build}.vcf.gz; '
+        'wget -O {output.snpcheck} {params.url}/Amber.snpcheck.{params.alt_build}.vcf'
+
+rule _hmftools_get_purple_drivers: 
+    output: 
+        hotspots = CFG["dirs"]["inputs"] + "references/{genome_build}/purple/KnownHotspots.vcf.gz", 
+        gene_panel = CFG["dirs"]["inputs"] + "references/{genome_build}/purple/DriverGenePanel.tsv"
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/purple",
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -O {output.hotspots} {params.url}/KnownHotspots.somatic.{params.alt_build}.vcf.gz && '
+        'wget -O {output.hotspots}.tbi {params.url}/KnownHotspots.somatic.{params.alt_build}.vcf.gz.tbi && '
+        'wget -O {output.gene_panel} {params.url}/DriverGenePanel.{params.alt_build}.tsv'
+
+rule _hmftools_get_linx_db: 
+    output: 
+        directory(CFG["dirs"]["inputs"] + "references/{genome_build}/linx_db")
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/linx/Linx", 
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -r -np -nd -P {output} -A .bed,.csv {params.url}/{params.alt_build}  && '
+        'wget -O {output}/viral_host_ref.csv {params.url}/viral_host_ref.csv'
+
+rule _hmftools_get_ensembl_cache: 
+    output: 
+        cache = directory(CFG["dirs"]["inputs"] + "references/{genome_build}/ensembl_cache/"), 
+        complete = touch(CFG["dirs"]["inputs"] + "references/{genome_build}/ensembl_cache/cache.complete")
+    params: 
+        url = "www.bcgsc.ca/downloads/morinlab/hmftools-references/ensembl_data_cache",
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build] 
+    conda: 
+        CFG["conda_envs"]["wget"]
+    shell: 
+        'wget -O {output.cache}/{params.alt_build}.zip {params.url}/{params.alt_build}.zip && '
+        'unzip -d {output.cache} {output.cache}/{params.alt_build}.zip'
+
+# Prepare SLMS-3 VCF files for use with PURPLE
+# SnpEff annotation enables driver discovery logic
+
+rule _hmftools_slms3_sample_names: 
+    input: 
+        vcf = rules._hmftools_input_slms3.output.vcf
+    output: 
+        vcf = temp(CFG["dirs"]["prepare_slms3"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/tmp.slms3.vcf")
+    log: CFG["dirs"]["prepare_slms3"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/vcf_sample_names.log"
+    conda: 
+        CFG["conda_envs"]["bcftools"]
+    threads: CFG["threads"]["vcf_sample_names"]
+    resources: 
+        **CFG["resources"]["vcf_sample_names"]
+    group: "input_and_vcf"
+    shell: 
+        op.as_one_line("""
+        bcftools view -Ov {input.vcf} | 
+        sed 's/TUMOR/{wildcards.tumour_id}/g' | 
+        sed 's/NORMAL/{wildcards.normal_id}/g'  
+        > {output.vcf}
+        """)
+
+rule _hmftools_snpeff_vcf: 
+    input: 
+        vcf = str(rules._hmftools_slms3_sample_names.output.vcf)
+    output: 
+        sample_key = temp(CFG["dirs"]["prepare_slms3"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/sample_key.txt"),
+        vcf = temp(CFG["dirs"]["prepare_slms3"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/slms3.snpeff.vcf.gz")
+    resources: 
+        **CFG["resources"]["snpeff"]
+    params: 
+        snpeff_build = lambda w: {
+            "grch37": "GRCh37.75", 
+            "hs37d5": "GRCh37.75", 
+            "hg38": "hg38"
+        }[w.genome_build], 
+        config = "$(readlink -e $(which snpEff)).config",
+        mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8)
+    log: 
+        CFG["logs"]["prepare_slms3"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/snpeff_slms3.log"
+    conda: 
+        CFG["conda_envs"]["snpeff"]
+    threads: 
+        CFG["threads"]["snpeff"]
+    shell: 
+        op.as_one_line("""
+        printf "{wildcards.normal_id}\t{wildcards.tumour_id}\n" > {output.sample_key} && 
+        snpEff -Xmx{params.mem_mb}m   
+        -c {params.config} -noStats
+        -cancer -cancerSamples {output.sample_key} 
+        {params.snpeff_build} {input.vcf} | 
+        bcftools view -Oz -o {output.vcf} - && 
+        bcftools index -t {output.vcf}
+        """)
+
+
+# Run AMBER to calculate BAFs
+rule _hmftools_amber_matched: 
+    input: 
+        tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
+        normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", 
+        snps = str(rules._hmftools_get_amber_snps.output.vcf), 
+        fasta = str(rules._hmftools_input_references.output.genome_fa)
+    output: 
+        vcf = CFG["dirs"]["amber"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.amber.baf.vcf.gz"
+    resources: 
+        **CFG["resources"]["amber"]
+    params:
+        options = CFG["options"]["amber"], 
+        jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8) 
+    log: CFG["logs"]["amber"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/amber.log"
+    wildcard_constraints: 
+        pair_status = "matched"
+    conda: 
+        CFG["conda_envs"]["amber"]
+    threads: 
+        CFG["threads"]["amber"]
+    shell: 
+        op.as_one_line("""
+        AMBER -Xmx{params.jvmheap}m
+        -reference {wildcards.normal_id} -reference_bam {input.normal_bam}
+        -tumor {wildcards.tumour_id} -tumor_bam {input.tumour_bam}
+        -output_dir `dirname {output.vcf}`
+        -threads {threads}
+        -loci {input.snps}
+        -ref_genome {input.fasta} 
+        {params.options}
+        2>&1 | tee -a {log} 
+        """)
+
+rule _hmftools_amber_unmatched: 
+    input: 
+        tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
+        normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", 
+        snps = str(rules._hmftools_get_amber_snps.output.vcf), 
+        fasta = str(rules._hmftools_input_references.output.genome_fa)
+    output: 
+        vcf = CFG["dirs"]["amber"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.amber.baf.vcf.gz"
+    resources: 
+        **CFG["resources"]["amber"]
+    params:
+        options = CFG["options"]["amber"], 
+        jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8)
+    log: CFG["logs"]["amber"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/amber.log"
+    wildcard_constraints: 
+        pair_status = "unmatched"
+    conda: 
+        CFG["conda_envs"]["amber"]
+    threads: 
+        CFG["threads"]["amber"]
+    shell: 
+        op.as_one_line("""
+        AMBER -Xmx{params.jvmheap}m
+        -tumor_only 
+        -tumor {wildcards.tumour_id} -tumor_bam {input.tumour_bam} 
+        -output_dir `dirname {output.vcf}`
+        -threads {threads}
+        -loci {input.snps}
+        -ref_genome {input.fasta}
+        {params.options}
+        2>&1 | tee -a {log} 
+        """)
+
+# Run COBALT to estimate depth across the genome
+rule _hmftools_cobalt: 
+    input: 
+        tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
+        normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", 
+        gc_profile = str(rules._hmftools_get_cobalt_gc.output.gc), 
+        fasta = str(rules._hmftools_input_references.output.genome_fa)
+    output: 
+        tumour_ratio = CFG["dirs"]["cobalt"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.cobalt.ratio.pcf", 
+        normal_ratio = CFG["dirs"]["cobalt"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{normal_id}.cobalt.ratio.pcf", 
+        tumour_tsv = temp(CFG["dirs"]["cobalt"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.cobalt.ratio.tsv"), 
+    log: ratio = CFG["logs"]["cobalt"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/cobalt.log"
+    resources: 
+        **CFG["resources"]["cobalt"]
+    params:
+        options = CFG["options"]["cobalt"], 
+        jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8)
+    wildcard_constraints: 
+        pair_status = "matched|unmatched"
+    conda: 
+        CFG["conda_envs"]["cobalt"]
+    threads: 
+        CFG["threads"]["cobalt"]
+    shell: 
+        op.as_one_line("""
+        COBALT -Xmx{params.jvmheap}m
+        -reference {wildcards.normal_id} -reference_bam {input.normal_bam} 
+        -tumor {wildcards.tumour_id} -tumor_bam {input.tumour_bam} 
+        -ref_genome {input.fasta} 
+        -output_dir `dirname {output.tumour_ratio}` 
+        -threads {threads} 
+        -gc_profile {input.gc_profile} 
+        {params.options} 
+        2>&1 | tee -a {log} 
+        """)
+
+
+# Run PURPLE for final CNV calling 
+
+# Define variables for output file names
+purple_out = [
+    "purity.tsv", 
+    "purity.range.tsv", 
+    "cnv.gene.tsv", 
+    "sv.vcf.gz", 
+]
+purple_plots = [
+        "circos",
+        "input",
+        "map",
+        "purity.range",
+        "segment"
+    ]
+
+rule _hmftools_purple_matched:
+    input: 
+        amber = CFG["dirs"]["amber"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.amber.baf.vcf.gz",
+        cobalt_tumour = str(rules._hmftools_cobalt.output.tumour_ratio),
+        cobalt_normal = str(rules._hmftools_cobalt.output.normal_ratio),
+        cobalt_tumour_tsv = str(rules._hmftools_cobalt.output.tumour_tsv), 
+        slms3_vcf = str(rules._hmftools_snpeff_vcf.output.vcf), 
+        gridss_somatic_vcf = str(rules._hmftools_input_gridss.output.gridss_somatic_vcf),
+        gridss_filtered_vcf = str(rules._hmftools_input_gridss.output.gridss_filtered_vcf),
+        reference_fa = str(rules._hmftools_input_references.output.genome_fa), 
+        gene_panel = str(rules._hmftools_get_purple_drivers.output.gene_panel), 
+        hotspots = str(rules._hmftools_get_purple_drivers.output.hotspots), 
+        gc_profile = str(rules._hmftools_get_cobalt_gc.output.gc)
+    output: 
+        files = expand(CFG["dirs"]["purple"] + "{{seq_type}}--{{genome_build}}/{{tumour_id}}--{{normal_id}}--{{pair_status}}/{{tumour_id}}.purple.{out_file}", 
+            out_file = purple_out), 
+        plots = expand(CFG["dirs"]["purple"] + "{{seq_type}}--{{genome_build}}/{{tumour_id}}--{{normal_id}}--{{pair_status}}/plot/{{tumour_id}}.{plot_name}.png", 
+            plot_name = purple_plots)
+    log: CFG["logs"]["purple"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/purple.log"
+    resources: 
+        **CFG["resources"]["purple"]
+    params: 
+        outdir = CFG["dirs"]["purple"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}", 
+        options = CFG["options"]["purple"],
+        circos = "`which circos`", 
+        jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.9)
+    wildcard_constraints: 
+        pair_status = "matched|unmatched", 
+        out_file = "|".join(purple_out), 
+        plot_name = "|".join(purple_plots)
+    conda: 
+        CFG["conda_envs"]["purple"]
+    threads: 
+        CFG["threads"]["purple"]
+    shell: 
+        op.as_one_line("""
+        PURPLE -Xmx{params.jvmheap}m -driver_catalog 
+            -reference {wildcards.normal_id} 
+            -tumor {wildcards.tumour_id} 
+            -output_dir {params.outdir} 
+            -amber `dirname {input.amber}` 
+            -cobalt `dirname {input.cobalt_tumour}` 
+            -gc_profile {input.gc_profile} 
+            -ref_genome {input.reference_fa}
+            -somatic_hotspots {input.hotspots} 
+            -driver_gene_panel {input.gene_panel}  
+            -somatic_vcf {input.slms3_vcf} 
+            -structural_vcf {input.gridss_filtered_vcf} 
+            -sv_recovery_vcf {input.gridss_somatic_vcf} 
+            -circos {params.circos} 
+            {params.options}
+            -threads {threads}
+            2>&1 | tee -a {log}
+        """)
+
+
+
+
+# Run LINX to cluster and visualize CNV and SV data
+rule _hmftools_linx: 
+    input: 
+        purple_vcf = CFG["dirs"]["purple"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.purple.sv.vcf.gz", 
+        ensembl_cache = str(rules._hmftools_get_ensembl_cache.output.cache), 
+        linx_db = str(rules._hmftools_get_linx_db.output)
+    output: 
+        clusters = CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.linx.vis_sv_data.tsv", 
+        svs = CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.linx.svs.tsv"
+    log: CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/linx.log"
+    resources: 
+        **CFG["resources"]["linx"]
+    params: 
+      ref_genome_version = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build], 
+      jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8), 
+      options = CFG["options"]["linx"], 
+      cache_subdir = lambda w: config["lcr-modules"]["hmftools"]["dirs"]["inputs"] + "references/" + w.genome_build + "/ensembl_cache/" + VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda: 
+        CFG["conda_envs"]["linx"]
+    threads: 
+        CFG["threads"]["linx"]
+    shell: 
+        op.as_one_line("""
+        linx -Xmx{params.jvmheap}m 
+            -sample {wildcards.tumour_id} 
+            -ref_genome_version {params.ref_genome_version} 
+            -sv_vcf {input.purple_vcf} 
+            -purple_dir `dirname {input.purple_vcf}` 
+            -output_dir `dirname {output.clusters}`  
+            -gene_transcripts_dir {params.cache_subdir} 
+            -fragile_site_file {input.linx_db}/fragile_sites_hmf.{params.ref_genome_version}.csv 
+            -line_element_file {input.linx_db}/line_elements.{params.ref_genome_version}.csv 
+            -viral_hosts_file {input.linx_db}/viral_host_ref.csv 
+            -known_fusion_file {input.linx_db}/known_fusion_data.{params.ref_genome_version}.csv 
+            -check_fusions 
+            -check_drivers 
+            -write_vis_data 
+            {params.options} 
+            2>&1 | tee -a {log}
+        """)
+
+rule _hmftools_linx_viz: 
+    input:
+        clusters = rules._hmftools_linx.output.clusters,
+        svs = rules._hmftools_linx.output.svs,
+        ensembl_cache = str(rules._hmftools_get_ensembl_cache.output.cache)
+    output:
+        plots = directory(CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/plot"),
+        data = directory(CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/data")
+    log: CFG["logs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/linx_viz.log"
+    resources:
+        **CFG["resources"]["linx_viz"]
+    params: 
+        linx_jar = "$(ls $(dirname $(readlink -e $(which linx)))/*.jar)", 
+        circos = "$(which circos)", 
+        jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8), 
+        options = CFG["options"]["linx_viz"], 
+        cache_subdir = lambda w: config["lcr-modules"]["hmftools"]["dirs"]["inputs"] + "references/" + w.genome_build + "/ensembl_cache/" + VERSION_MAP_HMFTOOLS[w.genome_build], 
+        alt_build = lambda w: VERSION_MAP_HMFTOOLS[w.genome_build]
+    conda:
+        CFG["conda_envs"]["linx"]
+    threads: 
+        CFG["threads"]["linx_viz"]
+    
+    shell:
+        op.as_one_line("""
+        to_plot=$(dirname {input.svs})/to_plot.tsv;
+        tail -n +2 {input.svs} | awk '{{FS=OFS="\\t"}} $4 != "" {{print $3}}' | sort | uniq > $to_plot; 
+        if [[ $(cat $to_plot | wc -l) -lt 50 ]]; then
+            cat $to_plot | while read cluster; do 
+                java -Xmx{params.jvmheap}m -cp {params.linx_jar} com.hartwig.hmftools.linx.visualiser.SvVisualiser 
+                    -sample {wildcards.tumour_id} 
+                    -ref_genome_version V{params.alt_build}
+                    -gene_transcripts_dir {params.cache_subdir} 
+                    -plot_out {output.plots} 
+                    -data_out {output.data} 
+                    -vis_file_dir $(dirname {input.clusters})
+                    -circos {params.circos} 
+                    -threads {threads}  
+                    -clusterId $cluster
+                    -plot_cluster_genes         
+                    2>&1 | tee -a {log};
+            done; 
+        else 
+            echo "Too many clusters to plot for {wildcards.tumour_id}--{wildcards.normal_id}--{wildcards.pair_status}. See chromosome outputs and consider manually selecting clusters to plot. " 2>&1 | tee -a {log}; 
+        fi;
+        for chrom in $(tail -n +2 {input.clusters} | cut -f8 | sort | uniq); do 
+            java -Xmx{params.jvmheap}m -cp {params.linx_jar} com.hartwig.hmftools.linx.visualiser.SvVisualiser 
+                -sample {wildcards.tumour_id} 
+                -ref_genome_version V{params.alt_build}
+                -gene_transcripts_dir {params.cache_subdir} 
+                -plot_out {output.plots} 
+                -data_out {output.data} 
+                -vis_file_dir $(dirname {input.clusters})
+                -circos {params.circos} 
+                -threads {threads}  
+                -chromosome ${{chrom}}
+                2>&1 | tee -a {log}; 
+        done
+        """) 
+        
+
+
+
+# Symlinks the final output files into the module results directory (under '99-outputs/')
+
+rule _hmftools_purple_output:
+    input:
+        files = CFG["dirs"]["purple"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.purple.{out_file}" 
+    output:
+        files = CFG["dirs"]["outputs"] + "purple_output/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.purple.{out_file}" 
+    wildcard_constraints: 
+        out_file = "|".join(purple_out) 
+    run:
+        op.relative_symlink(input.files, output.files, in_module=True)
+
+rule _hmftools_purple_plots: 
+    input:
+        plots = CFG["dirs"]["purple"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/plot/{tumour_id}.{plot_name}.png"
+    output: 
+        plots = CFG["dirs"]["outputs"] + "purple_plots/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{plot_name}.png"
+    wildcard_constraints: 
+        plot_name = "|".join(purple_plots)
+    run: 
+        op.relative_symlink(input.plots, output.plots, in_module=True)
+
+
+rule _hmftools_linx_plots: 
+    input:
+        plots = CFG["dirs"]["linx"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/plot", 
+    output: 
+        plots = CFG["dirs"]["outputs"] + "linx_plots/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.symlinked"
+    shell: 
+        op.as_one_line("""
+        workdir=$PWD &&
+        cd `dirname {output.plots}` && 
+        find $workdir/{input.plots} -type f -name "*.png" -exec cp -s {{}} . \; && 
+        touch $workdir/{output.plots} && 
+        cd $workdir
+        """)
+
+rule _hmftools_dispatch: 
+    input: 
+        files = expand(CFG["dirs"]["outputs"] + "purple_output/{{seq_type}}--{{genome_build}}/{{tumour_id}}--{{normal_id}}--{{pair_status}}.purple.{out_file}", 
+            out_file = purple_out), 
+        plots = expand(CFG["dirs"]["outputs"] + "purple_plots/{{seq_type}}--{{genome_build}}/{{tumour_id}}--{{normal_id}}--{{pair_status}}.{plot_name}.png", 
+            plot_name = purple_plots),
+        linx = rules._hmftools_linx_plots.output.plots
+    output: 
+        dispatched = touch(CFG["dirs"]["outputs"] + "dispatched/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.dispatched")
+
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _hmftools_all:
+    input:
+        expand(
+            [
+                str(rules._hmftools_dispatch.output.dispatched),
+            ],
+            zip,  # Run expand() with zip(), not product()
+            seq_type=CFG["runs"]["tumour_seq_type"],
+            genome_build=CFG["runs"]["tumour_genome_build"],
+            tumour_id=CFG["runs"]["tumour_sample_id"],
+            normal_id=CFG["runs"]["normal_sample_id"],
+            pair_status=CFG["runs"]["pair_status"])
+
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/hmftools/1.1/schemas/base-1.0.yaml b/modules/hmftools/1.1/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/hmftools/1.1/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/hmftools/CHANGELOG.md b/modules/hmftools/CHANGELOG.md
index 7239dbcac..1c038d336 100644
--- a/modules/hmftools/CHANGELOG.md
+++ b/modules/hmftools/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to the `hmftools` module will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.1] - 2021-12-29
+
+- Updates to the GRIDSS-PURPLE-LINX pipeline, incorporating new and better ways of handling unmatched tumours. 
+
 ## [1.0] - 2020-07-29
 
 This release was authored by Laura Hilton.
diff --git a/modules/ichorcna/1.0/ichorcna.smk b/modules/ichorcna/1.0/ichorcna.smk
index 74c54f7de..85750346b 100644
--- a/modules/ichorcna/1.0/ichorcna.smk
+++ b/modules/ichorcna/1.0/ichorcna.smk
@@ -70,7 +70,7 @@ rule _install_ichorcna:
         CFG["conda_envs"]["ichorcna"]
     shell:
         op.as_one_line("""
-        git clone git@github.com:broadinstitute/ichorCNA.git {params.outdir} &&            
+        git clone git://github.com/broadinstitute/ichorCNA.git {params.outdir} &&            
         touch {output.complete}""")
 
 # This defines the script/extdata directory used by ichorCNA in the subsequent rules:
@@ -216,14 +216,13 @@ rule _ichorcna_read_counter:
         "readCounter {input.bam} -c {params.chrs} -w {params.binSize} -q {params.qual} > {output} 2> {log}"
 
 
-# This function will return a comma-separated list of chromosomes to include in readCounter
+# This function will return a comma-separated list of chromosomes to include in runIchorCNA
 def get_chromosomes_R(wildcards):
     chromosomesR=[]
     stringStart="c('"
     for i in range(1,23):
         chromosomesR.append(str(i))
     chromosomesR.append("X")
-    chromosomesR.append("Y")
     if "38" in str(wildcards.genome_build):
         chromosomesR = ["chr" + x for x in chromosomesR]
     chromosomesR= "','".join(chromosomesR)
diff --git a/modules/ichorcna/1.1/config/default.yaml b/modules/ichorcna/1.1/config/default.yaml
new file mode 100644
index 000000000..4844ba015
--- /dev/null
+++ b/modules/ichorcna/1.1/config/default.yaml
@@ -0,0 +1,115 @@
+lcr-modules:
+    
+    ichorcna:
+
+        inputs:
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            sample_bam: "__UPDATE__"
+            sample_bai: "__UPDATE__"
+
+
+        scratch_subdirectories: []
+
+        options:
+            deeptools:
+                qual: 20 # only includes reads with mapping quality greater than 20
+                binSize:  1000000 # set window size to compute coverage 
+                # available binSizes are: 1000000, 500000, 50000, 10000
+                flagExclude: 1028
+                opt: " --ignoreDuplicates --extendReads "
+            run:
+                ichorCNA_libdir: ""
+                ichorCNA_rscript:  "{MODSDIR}/src/runIchorCNA.R"
+                # use panel matching same bin size (optional)
+                ichorCNA_normalPanel: 
+                    "1000000": "inst/extdata/HD_ULP_PoN_{genome_build}_1Mb_median_normAutosome_median.rds"
+                    "500000": "inst/extdata/HD_ULP_PoN_{genome_build}_500kb_median_normAutosome_median.rds"
+                # must use gc wig file corresponding to same binSize (required)
+                ichorCNA_gcWig:
+                    "1000000": "inst/extdata/gc_{genome_build}_1000kb.wig"
+                    "500000": "inst/extdata/gc_{genome_build}_500kb.wig"
+                    "50000": "inst/extdata/gc_{genome_build}_50kb.wig"
+                    "10000": "inst/extdata/gc_{genome_build}_10kb.wig"
+                # must use map wig file corresponding to same binSize (required)
+                ichorCNA_mapWig:  
+                    "1000000": "inst/extdata/map_{genome_build}_1000kb.wig"
+                    "500000": "inst/extdata/map_{genome_build}_500kb.wig"
+                    "50000": "inst/extdata/map_{genome_build}_50kb.wig"
+                    "10000": "inst/extdata/map_{genome_build}_10kb.wig"
+                # use bed file if sample has targeted regions, eg. exome data (optional)
+                ichorCNA_exons:  NULL
+                ichorCNA_centromere:  
+                    grch37: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt"
+                    hg19: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt"
+                    hs37d5: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt"
+                    grch38: "inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt"
+                    hg38: "inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt"
+                ichorCNA_minMapScore: 0.75
+                ichorCNA_fracReadsInChrYForMale: 0.002 # Threshold for fraction of reads in chrY to assign as male
+                ichorCNA_genomeStyle:  # can set this to UCSC or NCBI
+                    grch37: "NCBI"
+                    hg19: "NCBI"
+                    hs37d5: "NCBI"
+                    grch38: "UCSC"
+                    hg38: "UCSC"
+                # chrs used for training ichorCNA parameters, e.g. tumor fraction. 
+                ichorCNA_chrTrain:  
+                    grch37: "c(1:22)"
+                    hg19: "c(1:22)"
+                    hs37d5: "c(1:22)"
+                    grch38: "paste0('chr', c(1:22))"
+                    hg38: "paste0('chr', c(1:22))"
+                # non-tumor fraction parameter restart values; higher values should be included for cfDNA
+                ichorCNA_normal:  "c(0.5,0.6,0.7,0.8,0.9,0.95)"
+                # ploidy parameter restart values
+                ichorCNA_ploidy:  "c(2,3,4)"
+                ichorCNA_estimateNormal:  TRUE
+                ichorCNA_estimatePloidy:  TRUE
+                ichorCNA_estimateClonality: TRUE
+                # states to use for subclonal CN
+                ichorCNA_scStates:  "c(1,3)"
+                # set maximum copy number to use
+                ichorCNA_maxCN:  5
+                # TRUE/FALSE to include homozygous deletion state # FALSE for low coverage libraries (ex. 0.1x) ; can turn on for higher coverage data (ex. >10x)
+                ichorCNA_includeHOMD: FALSE
+                # Exclude solutions if total length of subclonal CNAs > this fraction of the genome 
+                ichorCNA_maxFracGenomeSubclone: 0.5
+                # Exclude solutions if total length of subclonal CNAs > this fraction of total CNA length
+                ichorCNA_maxFracCNASubclone: 0.7
+                # control segmentation - higher (e.g. 0.9999999) leads to higher specificity and fewer segments
+                # lower (e.g. 0.99) leads to higher sensitivity and more segments
+                ichorCNA_txnE:  0.9399999
+                # control segmentation - higher (e.g. 10000000) leads to higher specificity and fewer segments
+                # lower (e.g. 100) leads to higher sensitivity and more segments
+                ichorCNA_txnStrength:  10000
+                ichorCNA_plotFileType:  "pdf"
+                ichorCNA_plotYlim:  "c(-2,2)"
+
+
+        conda_envs:
+            ichorcna: "{MODSDIR}/envs/ichorcna.env.yaml"
+            deeptools: "{MODSDIR}/envs/deeptools.env.yaml"
+            bedops_tools: "{MODSDIR}/envs/bedops_tools.env.yaml"
+            ucsc-bigwigtowig: "{MODSDIR}/envs/ucsc-bigwigtowig.env.yaml"
+
+        threads:
+            deeptools: 20
+            ucsc: 4
+            run: 4
+
+        resources:
+            deeptools:
+                mem_mb: 40000
+                bam: 1
+            ucsc:
+                mem_mb: 6000
+                bam: 1
+            run: 
+                mem_mb: 6000
+                bam: 1
+
+        pairing_config:
+            genome:
+                run_paired_tumours: False
+                run_unpaired_tumours_with: "no_normal"
+                run_paired_tumours_as_unpaired: True
diff --git a/modules/ichorcna/1.1/envs/bedops_tools.env.yaml b/modules/ichorcna/1.1/envs/bedops_tools.env.yaml
new file mode 100644
index 000000000..2052a2e05
--- /dev/null
+++ b/modules/ichorcna/1.1/envs/bedops_tools.env.yaml
@@ -0,0 +1,29 @@
+name: null
+channels:
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - bedops=2.4.39
+  - bedtools=2.30.0
+  - bzip2=1.0.8
+  - c-ares=1.17.1
+  - ca-certificates=2021.10.26
+  - curl=7.80.0
+  - krb5=1.19.2
+  - libcurl=7.80.0
+  - libedit=3.1.20210910
+  - libev=4.33
+  - libgcc=7.2.0
+  - libgcc-ng=9.3.0
+  - libgomp=9.3.0
+  - libnghttp2=1.46.0
+  - libssh2=1.9.0
+  - libstdcxx-ng=9.3.0
+  - ncurses=6.3
+  - openssl=1.1.1l
+  - samtools=1.7
+  - xz=5.2.5
+  - zlib=1.2.11
+prefix: /projects/rmorin/projects/tumour_contam/envs/bedops_tools
diff --git a/modules/ichorcna/1.1/envs/deeptools.env.yaml b/modules/ichorcna/1.1/envs/deeptools.env.yaml
new file mode 100644
index 000000000..fab883742
--- /dev/null
+++ b/modules/ichorcna/1.1/envs/deeptools.env.yaml
@@ -0,0 +1,76 @@
+name: null
+channels:
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - blas=1.0
+  - brotli=1.0.9
+  - bzip2=1.0.8
+  - c-ares=1.17.1
+  - ca-certificates=2021.10.26
+  - certifi=2021.10.8
+  - curl=7.80.0
+  - cycler=0.11.0
+  - deeptools=3.5.1
+  - deeptoolsintervals=0.1.9
+  - fonttools=4.25.0
+  - freetype=2.11.0
+  - giflib=5.2.1
+  - intel-openmp=2021.4.0
+  - jpeg=9d
+  - kiwisolver=1.3.1
+  - krb5=1.19.2
+  - lcms2=2.12
+  - ld_impl_linux-64=2.35.1
+  - libcurl=7.80.0
+  - libdeflate=1.0
+  - libedit=3.1.20210910
+  - libev=4.33
+  - libffi=3.3
+  - libgcc-ng=9.3.0
+  - libgfortran-ng=7.5.0
+  - libgfortran4=7.5.0
+  - libgomp=9.3.0
+  - libnghttp2=1.46.0
+  - libpng=1.6.37
+  - libssh2=1.9.0
+  - libstdcxx-ng=9.3.0
+  - libtiff=4.2.0
+  - libwebp=1.2.0
+  - libwebp-base=1.2.0
+  - lz4-c=1.9.3
+  - matplotlib-base=3.5.0
+  - mkl=2021.4.0
+  - mkl-service=2.4.0
+  - mkl_fft=1.3.1
+  - mkl_random=1.2.2
+  - munkres=1.0.7
+  - ncurses=6.3
+  - numpy=1.21.2
+  - numpy-base=1.21.2
+  - olefile=0.46
+  - openssl=1.1.1l
+  - packaging=21.3
+  - pillow=8.4.0
+  - pip=21.2.2
+  - plotly=4.14.3
+  - py2bit=0.3.0
+  - pybigwig=0.3.17
+  - pyparsing=3.0.4
+  - pysam=0.15.3
+  - python=3.7.11
+  - python-dateutil=2.8.2
+  - readline=8.1
+  - retrying=1.3.3
+  - scipy=1.7.1
+  - setuptools=58.0.4
+  - six=1.16.0
+  - sqlite=3.37.0
+  - tk=8.6.11
+  - wheel=0.37.0
+  - xz=5.2.5
+  - zlib=1.2.11
+  - zstd=1.4.9
+prefix: /projects/rmorin/projects/tumour_contam/envs/deeptools
diff --git a/modules/ichorcna/1.1/envs/ichorcna.env.yaml b/modules/ichorcna/1.1/envs/ichorcna.env.yaml
new file mode 100644
index 000000000..208fd501c
--- /dev/null
+++ b/modules/ichorcna/1.1/envs/ichorcna.env.yaml
@@ -0,0 +1,108 @@
+name: null
+channels:
+  - conda-forge
+  - dranew
+  - bioconda
+  - defaults
+  - r
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - _r-mutex=1.0.1
+  - binutils_impl_linux-64=2.35.1
+  - binutils_linux-64=2.35
+  - bioconductor-biocgenerics=0.36.0
+  - bioconductor-genomeinfodb=1.26.0
+  - bioconductor-genomeinfodbdata=1.2.4
+  - bioconductor-genomicranges=1.42.0
+  - bioconductor-hmmcopy=1.32.0
+  - bioconductor-iranges=2.24.0
+  - bioconductor-s4vectors=0.28.0
+  - bioconductor-xvector=0.30.0
+  - bioconductor-zlibbioc=1.36.0
+  - bwidget=1.9.14
+  - bzip2=1.0.8
+  - ca-certificates=2020.12.5
+  - cairo=1.16.0
+  - curl=7.71.1
+  - fontconfig=2.13.1
+  - freetype=2.10.4
+  - fribidi=1.0.10
+  - gcc_impl_linux-64=9.3.0
+  - gcc_linux-64=9.3.0
+  - gettext=0.19.8.1
+  - gfortran_impl_linux-64=9.3.0
+  - gfortran_linux-64=9.3.0
+  - graphite2=1.3.13
+  - gsl=2.6
+  - gxx_impl_linux-64=9.3.0
+  - gxx_linux-64=9.3.0
+  - harfbuzz=2.8.0
+  - hmmcopy_utils=0.0.1
+  - icu=68.1
+  - jpeg=9d
+  - kernel-headers_linux-64=2.6.32
+  - krb5=1.17.2
+  - ld_impl_linux-64=2.35.1
+  - libblas=3.8.0
+  - libcblas=3.8.0
+  - libcurl=7.71.1
+  - libedit=3.1.20191231
+  - libffi=3.3
+  - libgcc-devel_linux-64=9.3.0
+  - libgcc-ng=9.3.0
+  - libgfortran-ng=9.3.0
+  - libgfortran5=9.3.0
+  - libglib=2.66.7
+  - libgomp=9.3.0
+  - libiconv=1.16
+  - liblapack=3.8.0
+  - libopenblas=0.3.10
+  - libpng=1.6.37
+  - libssh2=1.9.0
+  - libstdcxx-devel_linux-64=9.3.0
+  - libstdcxx-ng=9.3.0
+  - libtiff=4.2.0
+  - libuuid=2.32.1
+  - libwebp-base=1.2.0
+  - libxcb=1.13
+  - libxml2=2.9.10
+  - lz4-c=1.9.3
+  - make=4.3
+  - ncurses=6.2
+  - openssl=1.1.1j
+  - pango=1.42.4
+  - pcre=8.44
+  - pcre2=10.36
+  - pixman=0.40.0
+  - pthread-stubs=0.4
+  - r-base=4.0.3
+  - r-bitops=1.0_6
+  - r-data.table=1.14.0
+  - r-getopt=1.20.3
+  - r-ichorcna=0.2.0
+  - r-optparse=1.6.6
+  - r-plyr=1.8.6
+  - r-rcpp=1.0.6
+  - r-rcurl=1.98_1.2
+  - readline=8.0
+  - sed=4.8
+  - sysroot_linux-64=2.12
+  - tk=8.6.10
+  - tktable=2.10
+  - xorg-kbproto=1.0.7
+  - xorg-libice=1.0.10
+  - xorg-libsm=1.2.3
+  - xorg-libx11=1.7.0
+  - xorg-libxau=1.0.9
+  - xorg-libxdmcp=1.1.3
+  - xorg-libxext=1.3.4
+  - xorg-libxrender=0.9.10
+  - xorg-libxt=1.2.1
+  - xorg-renderproto=0.11.1
+  - xorg-xextproto=7.3.0
+  - xorg-xproto=7.0.31
+  - xz=5.2.5
+  - zlib=1.2.11
+  - zstd=1.4.9
+prefix: /projects/rmorin/projects/tumour_contam/envs/ichorcna
diff --git a/modules/ichorcna/1.1/envs/ucsc-bigwigtowig.env.yaml b/modules/ichorcna/1.1/envs/ucsc-bigwigtowig.env.yaml
new file mode 100644
index 000000000..4d035cb87
--- /dev/null
+++ b/modules/ichorcna/1.1/envs/ucsc-bigwigtowig.env.yaml
@@ -0,0 +1,19 @@
+name: null
+channels:
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - ca-certificates=2021.10.26
+  - libgcc=7.2.0
+  - libgcc-ng=9.3.0
+  - libgomp=9.3.0
+  - libpng=1.6.37
+  - libstdcxx-ng=9.3.0
+  - libuuid=1.0.3
+  - mysql-connector-c=6.1.6
+  - openssl=1.0.2u
+  - ucsc-bigwigtowig=366
+  - zlib=1.2.11
+prefix: /projects/rmorin/projects/tumour_contam/envs/ucsc-bigwigtowig
diff --git a/modules/ichorcna/1.1/ichorcna.smk b/modules/ichorcna/1.1/ichorcna.smk
new file mode 100644
index 000000000..8391dee52
--- /dev/null
+++ b/modules/ichorcna/1.1/ichorcna.smk
@@ -0,0 +1,431 @@
+#!/usr/bin/env snakemake
+
+
+# ---------------------------------------------------------------------------- #
+##### ATTRIBUTION #####
+# ---------------------------------------------------------------------------- #
+
+# Original snakemake author: Jasper Wong
+# Module author: Jasper Wong
+# Additional contributors: N/A
+
+
+# ---------------------------------------------------------------------------- #
+##### SETUP #####
+# ---------------------------------------------------------------------------- #
+
+### Modules ###
+
+import pandas as pd
+import numpy as np
+import oncopipe as op
+import glob
+import os
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+### Directories ###
+# Setup module and store module-specific configuration in `CFG`.
+CFG = op.setup_module(
+    name = "ichorcna", 
+    version = "1.0",
+    subdirectories = ["inputs", "readDepth", "seg", "outputs"]
+)
+
+localrules:
+    _ichorcna_input_bam,
+    _ichorcna_output,
+    _ichorcna_all
+
+# ---------------------------------------------------------------------------- #
+##### RULES #####
+# ---------------------------------------------------------------------------- #
+
+### Set-up dependencies and packages ###
+# Download github and all external files for ichorCNA: (needed since their extdata is not complete for all genome builds)
+rule _install_ichorcna:
+    output:
+        complete = CFG["dirs"]["inputs"] + "ichorcna_dependencies_installed.success"
+    params:
+        outdir = CFG["dirs"]["inputs"] + "ichorCNA/"
+    conda:
+        CFG["conda_envs"]["ichorcna"]
+    shell:
+        op.as_one_line("""
+        git clone git://github.com/broadinstitute/ichorCNA.git {params.outdir} &&            
+        touch {output.complete}""")
+
+# This defines the script/extdata directory used by ichorCNA in the subsequent rules:
+ichorDir = CFG["dirs"]["inputs"] + "ichorCNA/inst/extdata/" 
+
+# Symlinks the extdata appropriately
+rule _setup_ichorcna_extdata:
+    input:
+        complete = CFG["dirs"]["inputs"] + "ichorcna_dependencies_installed.success"
+    params:
+        hg19_1Mb_rds = ichorDir + "HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
+        hg19_500kb_rds = ichorDir + "HD_ULP_PoN_500kb_median_normAutosome_mapScoreFiltered_median.rds",
+        hg38_1Mb_rds = ichorDir + "HD_ULP_PoN_hg38_1Mb_median_normAutosome_median.rds",
+        hg38_500kb_rds = ichorDir + "HD_ULP_PoN_hg38_500kb_median_normAutosome_median.rds",
+        hg19_1000kb_gc = ichorDir + "gc_hg19_1000kb.wig",
+        hg19_500kb_gc = ichorDir + "gc_hg19_500kb.wig",
+        hg19_50kb_gc = ichorDir + "gc_hg19_50kb.wig",
+        hg19_10kb_gc = ichorDir + "gc_hg19_10kb.wig",
+        hg38_1000kb_gc = ichorDir + "gc_hg38_1000kb.wig",
+        hg38_500kb_gc = ichorDir + "gc_hg38_500kb.wig",
+        hg38_50kb_gc = ichorDir + "gc_hg38_50kb.wig",
+        hg38_10kb_gc = ichorDir + "gc_hg38_10kb.wig",
+        hg19_1000kb_map = ichorDir + "map_hg19_1000kb.wig",
+        hg19_500kb_map = ichorDir + "map_hg19_500kb.wig",
+        hg19_50kb_map = ichorDir + "map_hg19_50kb.wig",
+        hg19_10kb_map = ichorDir + "map_hg19_10kb.wig",
+        hg38_1000kb_map = ichorDir + "map_hg38_1000kb.wig",
+        hg38_500kb_map = ichorDir + "map_hg38_500kb.wig",
+        hg38_50kb_map = ichorDir + "map_hg38_50kb.wig",
+        hg38_10kb_map = ichorDir + "map_hg38_10kb.wig",
+    output:
+        hg19_1Mb_rds = ichorDir + "HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds",
+        hg19_500kb_rds = ichorDir + "HD_ULP_PoN_hg19_500kb_median_normAutosome_median.rds",
+        grch37_1Mb_rds = ichorDir + "HD_ULP_PoN_grch37_1Mb_median_normAutosome_median.rds",
+        grch37_500kb_rds = ichorDir + "HD_ULP_PoN_grch37_500kb_normAutosome_median.rds",
+        hs37d5_1Mb_rds = ichorDir + "HD_ULP_PoN_hs37d5_1Mb_median_normAutosome_median.rds",
+        hs37d5_500kb_rds = ichorDir + "HD_ULP_PoN_hs37d5_500kb_normAutosome_median.rds",
+        grch38_1Mb_rds = ichorDir + "HD_ULP_PoN_grch38_1Mb_median_normAutosome_median.rds",
+        grch38_500kb_rds = ichorDir + "HD_ULP_PoN_grch38_500kb_median_normAutosome_median.rds",
+        grch37_1000kb_gc = ichorDir + "gc_grch37_1000kb.wig",
+        grch37_500kb_gc = ichorDir + "gc_grch37_500kb.wig",
+        grch37_50kb_gc = ichorDir + "gc_grch37_50kb.wig",
+        grch37_10kb_gc = ichorDir + "gc_grch37_10kb.wig",
+        hs37d5_1000kb_gc = ichorDir + "gc_hs37d5_1000kb.wig",
+        hs37d5_500kb_gc = ichorDir + "gc_hs37d5_500kb.wig",
+        hs37d5_50kb_gc = ichorDir + "gc_hs37d5_50kb.wig",
+        hs37d5_10kb_gc = ichorDir + "gc_hs37d5_10kb.wig",
+        grch38_1000kb_gc = ichorDir + "gc_grch38_1000kb.wig",
+        grch38_500kb_gc = ichorDir + "gc_grch38_500kb.wig",
+        grch38_50kb_gc = ichorDir + "gc_grch38_50kb.wig",
+        grch38_10kb_gc = ichorDir + "gc_grch38_10kb.wig",
+        grch37_1000kb_map = ichorDir + "map_grch37_1000kb.wig",
+        grch37_500kb_map = ichorDir + "map_grch37_500kb.wig",
+        grch37_50kb_map = ichorDir + "map_grch37_50kb.wig",
+        grch37_10kb_map = ichorDir + "map_grch37_10kb.wig",
+        hs37d5_1000kb_map = ichorDir + "map_hs37d5_1000kb.wig",
+        hs37d5_500kb_map = ichorDir + "map_hs37d5_500kb.wig",
+        hs37d5_50kb_map = ichorDir + "map_hs37d5_50kb.wig",
+        hs37d5_10kb_map = ichorDir + "map_hs37d5_10kb.wig",
+        grch38_1000kb_map = ichorDir + "map_grch38_1000kb.wig",
+        grch38_500kb_map = ichorDir + "map_grch38_500kb.wig",
+        grch38_50kb_map = ichorDir + "map_grch38_50kb.wig",
+        grch38_10kb_map = ichorDir + "map_grch38_10kb.wig",
+        complete = touch(ichorDir + "symlink.done")
+    run:
+        op.relative_symlink(params.hg19_1Mb_rds, output.hg19_1Mb_rds)
+        op.relative_symlink(params.hg19_500kb_rds, output.hg19_500kb_rds)
+        op.relative_symlink(params.hg19_1Mb_rds, output.grch37_1Mb_rds)
+        op.relative_symlink(params.hg19_1Mb_rds, output.hs37d5_1Mb_rds)
+        op.relative_symlink(params.hg19_500kb_rds, output.grch37_500kb_rds)
+        op.relative_symlink(params.hg19_500kb_rds, output.hs37d5_500kb_rds)
+        op.relative_symlink(params.hg38_1Mb_rds, output.grch38_1Mb_rds)
+        op.relative_symlink(params.hg38_500kb_rds, output.grch38_500kb_rds)
+        op.relative_symlink(params.hg19_1000kb_gc, output.grch37_1000kb_gc)
+        op.relative_symlink(params.hg19_500kb_gc, output.grch37_500kb_gc)
+        op.relative_symlink(params.hg19_50kb_gc, output.grch37_50kb_gc)
+        op.relative_symlink(params.hg19_10kb_gc, output.grch37_10kb_gc)
+        op.relative_symlink(params.hg19_1000kb_gc, output.hs37d5_1000kb_gc)
+        op.relative_symlink(params.hg19_500kb_gc, output.hs37d5_500kb_gc)
+        op.relative_symlink(params.hg19_50kb_gc, output.hs37d5_50kb_gc)
+        op.relative_symlink(params.hg19_10kb_gc, output.hs37d5_10kb_gc)
+        op.relative_symlink(params.hg38_1000kb_gc, output.grch38_1000kb_gc)
+        op.relative_symlink(params.hg38_500kb_gc, output.grch38_500kb_gc)
+        op.relative_symlink(params.hg38_50kb_gc, output.grch38_50kb_gc)
+        op.relative_symlink(params.hg38_10kb_gc, output.grch38_10kb_gc)
+        op.relative_symlink(params.hg19_1000kb_map, output.grch37_1000kb_map)
+        op.relative_symlink(params.hg19_500kb_map, output.grch37_500kb_map)
+        op.relative_symlink(params.hg19_50kb_map, output.grch37_50kb_map)
+        op.relative_symlink(params.hg19_10kb_map, output.grch37_10kb_map)
+        op.relative_symlink(params.hg19_1000kb_map, output.hs37d5_1000kb_map)
+        op.relative_symlink(params.hg19_500kb_map, output.hs37d5_500kb_map)
+        op.relative_symlink(params.hg19_50kb_map, output.hs37d5_50kb_map)
+        op.relative_symlink(params.hg19_10kb_map, output.hs37d5_10kb_map)
+        op.relative_symlink(params.hg38_1000kb_map, output.grch38_1000kb_map)
+        op.relative_symlink(params.hg38_500kb_map, output.grch38_500kb_map)
+        op.relative_symlink(params.hg38_50kb_map, output.grch38_50kb_map)
+        op.relative_symlink(params.hg38_10kb_map, output.grch38_10kb_map)
+
+### Run ichorCNA ###
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _ichorcna_input_bam:
+    input:
+        bam = CFG["inputs"]["sample_bam"],
+        bai = CFG["inputs"]["sample_bai"]
+    output:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
+        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai", 
+        crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai"
+    run:
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bai, output.crai)
+     
+
+# set-up for CRAM files (readCounter does not work with CRAM)
+# deeptools to get .bw from .bam and .cram
+rule _ichorcna_bamCoverage:
+    input:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
+        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai",
+        crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai",
+        ichorcna_package = CFG["dirs"]["inputs"] + "ichorcna_dependencies_installed.success",
+        symlink_complete = ichorDir + "symlink.done"
+    output:
+        bw = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/bw/{sample_id}.bin{binSize}.bw"
+    params:
+        binSize = CFG["options"]["deeptools"]["binSize"],
+        qual = CFG["options"]["deeptools"]["qual"],
+        excludeFlag = CFG["options"]["deeptools"]["flagExclude"],
+        opt = CFG["options"]["deeptools"]["opt"],
+        dirOut = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/bw/"
+    conda: CFG["conda_envs"]["deeptools"]
+    threads: CFG["threads"]["deeptools"]
+    resources:
+        **CFG["resources"]["deeptools"]
+    log:
+        CFG["logs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/bw/{sample_id}.bin{binSize}.log"
+    shell:
+        """
+            mkdir -p {params.dirOut}; 
+            bamCoverage -b {input.bam} --binSize {params.binSize} --minMappingQuality {params.qual} --samFlagExclude {params.excludeFlag} {params.opt} -o {output.bw} -p {threads}  
+        """
+
+
+# Converts bigWig to Wig
+rule _ichorcna_bigwigToWig:
+    input:
+        bw = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/bw/{sample_id}.bin{binSize}.bw"
+    output:
+        wig_int = temp(CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}{chrom}.wig"),
+    conda: CFG["conda_envs"]["ucsc-bigwigtowig"]
+    threads: CFG["threads"]["ucsc"]
+    resources:
+        **CFG["resources"]["ucsc"]
+    wildcard_constraints:
+        chrom = ".+(?<!--fixed)"
+    log:
+        CFG["logs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}.{chrom}.log"
+    shell:
+        """
+            bigWigToWig {input.bw} {output.wig_int} -chrom={wildcards.chrom} 
+        """
+
+
+# This function will reformat the wig file to one that can be used for ichorCNA
+rule _ichorcna_spread_centromeres:
+    input:
+        wig_int = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}{chrom}.wig",
+    output:
+        wig = temp(CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}.{chrom}--fixed.wig")
+    conda: CFG["conda_envs"]["bedops_tools"]
+    threads: CFG["threads"]["ucsc"]
+    resources:
+        **CFG["resources"]["ucsc"]
+    wildcard_constraints:
+        chrom = ".+(?<!--fixed)"
+    log:
+        CFG["logs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}.{chrom}.bedops.log"
+    shell:
+        """
+            echo -e "fixedStep chrom={wildcards.chrom} start=1 step={wildcards.binSize} span={wildcards.binSize} " > {output.wig} &&
+            intersectBed -a <( bedops --chop {wildcards.binSize} --header  {input.wig_int} ) -b {input.wig_int} -wa -wb | awk '{{print $7}}' >> {output.wig} 2>> {log}
+        """
+
+
+# This function is used to get the wigs of the main chromosomes, which will be stitched together
+def get_chrom_wigs(wildcards):
+    CFG = config["lcr-modules"]["ichorcna"]
+    chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes_withY.txt")
+    with open(chrs) as file:
+        chrs = file.read().rstrip("\n").split("\n")
+    wig = expand(
+        CFG["dirs"]["readDepth"] + "{{seq_type}}--{{genome_build}}/{{binSize}}/wig/{{sample_id}}.bin{{binSize}}.{chrom}--fixed.wig", 
+        chrom = chrs
+    )
+    return(wig)
+
+
+rule _ichorcna_wigCompile:
+    input:
+        get_chrom_wigs
+    output:
+        wig = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{sample_id}.bin{binSize}.wig"
+    shell:
+        """
+            cat {input} > {output.wig}
+        """
+
+
+
+# This function will return a comma-separated list of chromosomes to include in runIchorCNA
+def get_chromosomes_R(wildcards):
+    chromosomesR=[]
+    stringStart="c('"
+    for i in range(1,23):
+        chromosomesR.append(str(i))
+    chromosomesR.append("X")
+    if "38" in str(wildcards.genome_build):
+        chromosomesR = ["chr" + x for x in chromosomesR]
+    chromosomesR= "','".join(chromosomesR)
+    stringEnd="')"
+    return stringStart + chromosomesR + stringEnd
+
+rule _run_ichorcna:
+    input:
+        tum = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{tumour_id}.bin{binSize}.wig",
+    output:
+        corrDepth = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.correctedDepth.txt",
+        param = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.params.txt",
+        cna = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.cna.seg",
+        segTxt = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.seg.txt",
+        seg = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}.seg",
+        plot = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}/{tumour_id}_genomeWide.pdf",
+    params:
+        ichorDir = CFG["dirs"]["inputs"] + "ichorCNA/",
+        outDir = CFG["dirs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}/",
+        rscript = CFG["options"]["run"]["ichorCNA_rscript"],
+        name = "{tumour_id}",
+        ploidy = CFG["options"]["run"]["ichorCNA_ploidy"],
+        normal = CFG["options"]["run"]["ichorCNA_normal"],
+        gcwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]),
+        mapwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]),
+        normalpanel = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]),
+        estimateNormal = CFG["options"]["run"]["ichorCNA_estimateNormal"],
+        estimatePloidy = CFG["options"]["run"]["ichorCNA_estimatePloidy"],
+        estimateClonality = CFG["options"]["run"]["ichorCNA_estimateClonality"],
+        scStates = CFG["options"]["run"]["ichorCNA_scStates"],
+        maxCN = CFG["options"]["run"]["ichorCNA_maxCN"],
+        includeHOMD = CFG["options"]["run"]["ichorCNA_includeHOMD"],
+        chrs = get_chromosomes_R,
+        chrTrain = op.switch_on_wildcard("genome_build", CFG["options"]["run"]["ichorCNA_chrTrain"]),
+        genomeBuild = "{genome_build}",
+        genomeStyle = op.switch_on_wildcard("genome_build", CFG["options"]["run"]["ichorCNA_genomeStyle"]),
+        centromere = op.switch_on_wildcard("genome_build", CFG["options"]["run"]["ichorCNA_centromere"]),
+        fracReadsChrYMale = CFG["options"]["run"]["ichorCNA_fracReadsInChrYForMale"],
+        minMapScore = CFG["options"]["run"]["ichorCNA_minMapScore"],
+        maxFracGenomeSubclone = CFG["options"]["run"]["ichorCNA_maxFracGenomeSubclone"],
+        maxFracCNASubclone = CFG["options"]["run"]["ichorCNA_maxFracCNASubclone"],
+        exons = CFG["options"]["run"]["ichorCNA_exons"],
+        txnE = CFG["options"]["run"]["ichorCNA_txnE"],
+        txnStrength = CFG["options"]["run"]["ichorCNA_txnStrength"],
+        plotFileType = CFG["options"]["run"]["ichorCNA_plotFileType"],
+        plotYlim = CFG["options"]["run"]["ichorCNA_plotYlim"],
+        libdir = CFG["dirs"]["inputs"] + "ichorCNA/" + CFG["options"]["run"]["ichorCNA_libdir"]
+    conda: CFG["conda_envs"]["ichorcna"]
+    threads: CFG["threads"]["run"]
+    resources:
+        **CFG["resources"]["run"]
+    log:
+        stdout = CFG["logs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}.stdout.log",
+        stderr = CFG["logs"]["seg"] + "{seq_type}--{genome_build}/{binSize}/{tumour_id}--{normal_id}--{pair_status}.stderr.log"
+    shell:
+         op.as_one_line("""
+            Rscript {params.rscript} 
+            --id {params.name} 
+            --libdir {params.libdir} 
+            --WIG {input.tum} 
+            --gcWig {params.ichorDir}{params.gcwig} 
+            --mapWig {params.ichorDir}{params.mapwig} 
+            --normalPanel {params.ichorDir}{params.normalpanel} 
+            --ploidy \"{params.ploidy}\" 
+            --normal \"{params.normal}\" 
+            --maxCN {params.maxCN} 
+            --includeHOMD {params.includeHOMD} 
+            --chrs \"{params.chrs}\" 
+            --chrTrain \"{params.chrTrain}\" 
+            --genomeStyle {params.genomeStyle} 
+            --genomeBuild {params.genomeBuild} 
+            --estimateNormal {params.estimateNormal} 
+            --estimatePloidy {params.estimatePloidy} 
+            --estimateScPrevalence {params.estimateClonality} 
+            --scStates \"{params.scStates}\" 
+            --centromere {params.ichorDir}{params.centromere} 
+            --exons.bed {params.exons} 
+            --txnE {params.txnE} 
+            --txnStrength {params.txnStrength} 
+            --minMapScore {params.minMapScore} 
+            --fracReadsInChrYForMale {params.fracReadsChrYMale} 
+            --maxFracGenomeSubclone {params.maxFracGenomeSubclone} 
+            --maxFracCNASubclone {params.maxFracCNASubclone} 
+            --plotFileType {params.plotFileType} 
+            --plotYLim \"{params.plotYlim}\" 
+            --outDir {params.outDir} > {log.stdout} 2> {log.stderr}
+        """)
+
+
+# Symlinks the final output files into the module results directory (under '99-outputs/')
+rule _ichorcna_output:
+    input:
+        corrDepth = str(rules._run_ichorcna.output.corrDepth),
+        param = str(rules._run_ichorcna.output.param),
+        cna = str(rules._run_ichorcna.output.cna),
+        segTxt = str(rules._run_ichorcna.output.segTxt),
+        seg = str(rules._run_ichorcna.output.seg),
+        plot = str(rules._run_ichorcna.output.plot)
+    output:
+        corrDepth = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/corrDepth/{binSize}/{tumour_id}--{normal_id}--{pair_status}.corrDepth.txt",
+        param = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/param/{binSize}/{tumour_id}--{normal_id}--{pair_status}.param.txt",
+        cna = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/binCNA/{binSize}/{tumour_id}--{normal_id}--{pair_status}.cna.seg",
+        segTxt = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/seg_txt/{binSize}/{tumour_id}--{normal_id}--{pair_status}.seg.txt",
+        seg = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/seg/{binSize}/{tumour_id}--{normal_id}--{pair_status}.seg",
+        plot = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/plot/{binSize}/{tumour_id}--{normal_id}--{pair_status}_genomeWide.pdf"
+    run:
+        op.relative_symlink(input.corrDepth, output.corrDepth, in_module=True)
+        op.relative_symlink(input.param, output.param, in_module=True)
+        op.relative_symlink(input.cna, output.cna, in_module=True)
+        op.relative_symlink(input.segTxt, output.segTxt, in_module=True)
+        op.relative_symlink(input.seg, output.seg, in_module=True)
+        op.relative_symlink(input.plot, output.plot, in_module=True)
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _ichorcna_all:
+    input:
+        expand(
+            [
+                str(rules._ichorcna_output.output.corrDepth),
+                str(rules._ichorcna_output.output.param),
+                str(rules._ichorcna_output.output.cna),
+                str(rules._ichorcna_output.output.segTxt),
+                str(rules._ichorcna_output.output.seg),
+                str(rules._ichorcna_output.output.plot)
+            ],
+            zip,  # Run expand() with zip(), not product()
+            seq_type=CFG["runs"]["tumour_seq_type"],
+            genome_build=CFG["runs"]["tumour_genome_build"],
+            pair_status=CFG["runs"]["pair_status"],
+            tumour_id=CFG["runs"]["tumour_sample_id"],
+            normal_id=CFG["runs"]["normal_sample_id"],
+            binSize=[CFG["options"]["deeptools"]["binSize"]] * len(CFG["runs"]["tumour_sample_id"]))
+
+
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/ichorcna/1.1/schemas/base-1.0.yaml b/modules/ichorcna/1.1/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/ichorcna/1.1/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/ichorcna/1.1/src/runIchorCNA.R b/modules/ichorcna/1.1/src/runIchorCNA.R
new file mode 100755
index 000000000..ec1d7384f
--- /dev/null
+++ b/modules/ichorcna/1.1/src/runIchorCNA.R
@@ -0,0 +1,423 @@
+# file:   ichorCNA.R
+# authors: Gavin Ha, Ph.D.
+#          Fred Hutch
+# contact: <gha@fredhutch.org>
+#
+#         Justin Rhoades
+#          Broad Institute
+# contact: <rhoades@broadinstitute.org>
+
+# ichorCNA: https://github.com/broadinstitute/ichorCNA
+# date:   July 24, 2019
+# description: Hidden Markov model (HMM) to analyze Ultra-low pass whole genome sequencing (ULP-WGS) data.
+# This script is the main script to run the HMM.
+
+library(optparse)
+
+option_list <- list(
+  make_option(c("--WIG"), type = "character", help = "Path to tumor WIG file. Required."),
+  make_option(c("--NORMWIG"), type = "character", default=NULL, help = "Path to normal WIG file. Default: [%default]"),
+  make_option(c("--gcWig"), type = "character", help = "Path to GC-content WIG file; Required"),
+  make_option(c("--mapWig"), type = "character", default=NULL, help = "Path to mappability score WIG file. Default: [%default]"),
+  make_option(c("--normalPanel"), type="character", default=NULL, help="Median corrected depth from panel of normals. Default: [%default]"),
+  make_option(c("--exons.bed"), type = "character", default=NULL, help = "Path to bed file containing exon regions. Default: [%default]"),
+  make_option(c("--id"), type = "character", default="test", help = "Patient ID. Default: [%default]"),
+  make_option(c("--centromere"), type="character", default=NULL, help = "File containing Centromere locations; if not provided then will use hg19 version from ichorCNA package. Default: [%default]"),
+  make_option(c("--minMapScore"), type = "numeric", default=0.9, help="Include bins with a minimum mappability score of this value. Default: [%default]."),
+  make_option(c("--rmCentromereFlankLength"), type="numeric", default=1e5, help="Length of region flanking centromere to remove. Default: [%default]"),
+  make_option(c("--normal"), type="character", default="0.5", help = "Initial normal contamination; can be more than one value if additional normal initializations are desired. Default: [%default]"),
+  make_option(c("--scStates"), type="character", default="NULL", help = "Subclonal states to consider. Default: [%default]"),
+  make_option(c("--coverage"), type="numeric", default=NULL, help = "PICARD sequencing coverage. Default: [%default]"),
+  make_option(c("--lambda"), type="character", default="NULL", help="Initial Student's t precision; must contain 4 values (e.g. c(1500,1500,1500,1500)); if not provided then will automatically use based on variance of data. Default: [%default]"),
+  make_option(c("--lambdaScaleHyperParam"), type="numeric", default=3, help="Hyperparameter (scale) for Gamma prior on Student's-t precision. Default: [%default]"),
+  #	make_option(c("--kappa"), type="character", default=50, help="Initial state distribution"),
+  make_option(c("--ploidy"), type="character", default="2", help = "Initial tumour ploidy; can be more than one value if additional ploidy initializations are desired. Default: [%default]"),
+  make_option(c("--maxCN"), type="numeric", default=7, help = "Total clonal CN states. Default: [%default]"),
+  make_option(c("--estimateNormal"), type="logical", default=TRUE, help = "Estimate normal. Default: [%default]"),
+  make_option(c("--estimateScPrevalence"), type="logical", default=TRUE, help = "Estimate subclonal prevalence. Default: [%default]"),
+  make_option(c("--estimatePloidy"), type="logical", default=TRUE, help = "Estimate tumour ploidy. Default: [%default]"),
+  make_option(c("--maxFracCNASubclone"), type="numeric", default=0.7, help="Exclude solutions with fraction of subclonal events greater than this value. Default: [%default]"),
+  make_option(c("--maxFracGenomeSubclone"), type="numeric", default=0.5, help="Exclude solutions with subclonal genome fraction greater than this value. Default: [%default]"),
+  make_option(c("--minSegmentBins"), type="numeric", default=50, help="Minimum number of bins for largest segment threshold required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction."),
+  make_option(c("--altFracThreshold"), type="numeric", default=0.05, help="Minimum proportion of bins altered required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction. Default: [%default]"),
+  make_option(c("--chrNormalize"), type="character", default="c(1:22)", help = "Specify chromosomes to normalize GC/mappability biases. Default: [%default]"),
+  make_option(c("--chrTrain"), type="character", default="c(1:22)", help = "Specify chromosomes to estimate params. Default: [%default]"),
+  make_option(c("--chrs"), type="character", default="c(1:22,\"X\")", help = "Specify chromosomes to analyze. Default: [%default]"),
+  make_option(c("--genomeBuild"), type="character", default="hg19", help="Geome build. Default: [%default]"),
+  make_option(c("--genomeStyle"), type = "character", default = "NCBI", help = "NCBI or UCSC chromosome naming convention; use UCSC if desired output is to have \"chr\" string. [Default: %default]"),
+  make_option(c("--normalizeMaleX"), type="logical", default=TRUE, help = "If male, then normalize chrX by median. Default: [%default]"),
+  make_option(c("--minTumFracToCorrect"), type="numeric", default=0.1, help = "Tumor-fraction correction of bin and segment-level CNA if sample has minimum estimated tumor fraction. [Default: %default]"), 
+  make_option(c("--fracReadsInChrYForMale"), type="numeric", default=0.001, help = "Threshold for fraction of reads in chrY to assign as male. Default: [%default]"),
+  make_option(c("--includeHOMD"), type="logical", default=FALSE, help="If FALSE, then exclude HOMD state. Useful when using large bins (e.g. 1Mb). Default: [%default]"),
+  make_option(c("--txnE"), type="numeric", default=0.9999999, help = "Self-transition probability. Increase to decrease number of segments. Default: [%default]"),
+  make_option(c("--txnStrength"), type="numeric", default=1e7, help = "Transition pseudo-counts. Exponent should be the same as the number of decimal places of --txnE. Default: [%default]"),
+  make_option(c("--plotFileType"), type="character", default="pdf", help = "File format for output plots. Default: [%default]"),
+	make_option(c("--plotYLim"), type="character", default="c(-2,2)", help = "ylim to use for chromosome plots. Default: [%default]"),
+  make_option(c("--outDir"), type="character", default="./", help = "Output Directory. Default: [%default]"),
+  make_option(c("--libdir"), type = "character", default=NULL, help = "Script library path. Usually exclude this argument unless custom modifications have been made to the ichorCNA R package code and the user would like to source those R files. Default: [%default]")
+)
+parseobj <- OptionParser(option_list=option_list)
+opt <- parse_args(parseobj)
+print(opt)
+options(scipen=0, stringsAsFactors=F)
+
+library(HMMcopy)
+library(GenomicRanges)
+library(GenomeInfoDb)
+options(stringsAsFactors=FALSE)
+options(bitmapType='cairo')
+
+patientID <- opt$id
+tumour_file <- opt$WIG
+normal_file <- opt$NORMWIG
+gcWig <- opt$gcWig
+mapWig <- opt$mapWig
+normal_panel <- opt$normalPanel
+exons.bed <- opt$exons.bed  # "0" if none specified
+centromere <- opt$centromere
+minMapScore <- opt$minMapScore
+flankLength <- opt$rmCentromereFlankLength
+normal <- eval(parse(text = opt$normal))
+scStates <- eval(parse(text = opt$scStates))
+lambda <- eval(parse(text = opt$lambda))
+lambdaScaleHyperParam <- opt$lambdaScaleHyperParam
+estimateNormal <- opt$estimateNormal
+estimatePloidy <- opt$estimatePloidy
+estimateScPrevalence <- opt$estimateScPrevalence
+maxFracCNASubclone <- opt$maxFracCNASubclone
+maxFracGenomeSubclone <- opt$maxFracGenomeSubclone
+minSegmentBins <- opt$minSegmentBins
+altFracThreshold <- opt$altFracThreshold
+ploidy <- eval(parse(text = opt$ploidy))
+coverage <- opt$coverage
+maxCN <- opt$maxCN
+txnE <- opt$txnE
+txnStrength <- opt$txnStrength
+normalizeMaleX <- as.logical(opt$normalizeMaleX)
+includeHOMD <- as.logical(opt$includeHOMD)
+minTumFracToCorrect <- opt$minTumFracToCorrect
+fracReadsInChrYForMale <- opt$fracReadsInChrYForMale
+chrXMedianForMale <- -0.1
+outDir <- opt$outDir
+libdir <- opt$libdir
+plotFileType <- opt$plotFileType
+plotYLim <- eval(parse(text=opt$plotYLim))
+gender <- NULL
+outImage <- paste0(outDir,"/", patientID,".RData")
+genomeBuild <- opt$genomeBuild
+genomeStyle <- opt$genomeStyle
+chrs <- as.character(eval(parse(text = opt$chrs)))
+chrTrain <- as.character(eval(parse(text=opt$chrTrain))); 
+chrNormalize <- as.character(eval(parse(text=opt$chrNormalize))); 
+seqlevelsStyle(chrs) <- genomeStyle
+seqlevelsStyle(chrNormalize) <- genomeStyle
+seqlevelsStyle(chrTrain) <- genomeStyle
+
+## load ichorCNA library or source R scripts
+if (!is.null(libdir) && libdir != "None"){
+	source(paste0(libdir,"/R/utils.R"))
+	source(paste0(libdir,"/R/segmentation.R"))
+	source(paste0(libdir,"/R/EM.R"))
+	source(paste0(libdir,"/R/output.R"))
+	source(paste0(libdir,"/R/plotting.R"))
+} else {
+    library(ichorCNA)
+}
+
+## load seqinfo 
+# seqinfo <- getSeqInfo(genomeBuild, genomeStyle)
+seqinfo <- NULL
+
+if (substr(tumour_file,nchar(tumour_file)-2,nchar(tumour_file)) == "wig") {
+  wigFiles <- data.frame(cbind(patientID, tumour_file))
+} else {
+  wigFiles <- read.delim(tumour_file, header=F, as.is=T)
+}
+
+## FILTER BY EXONS IF PROVIDED ##
+## add gc and map to GRanges object ##
+if (is.null(exons.bed) || exons.bed == "None" || exons.bed == "NULL"){
+  targetedSequences <- NULL
+}else{
+  targetedSequences <- read.delim(exons.bed, header=T, sep="\t")  
+}
+
+## load PoN
+if (is.null(normal_panel) || normal_panel == "None" || normal_panel == "NULL"){
+	normal_panel <- NULL
+}
+
+if (is.null(centromere) || centromere == "None" || centromere == "NULL"){ # no centromere file provided
+	centromere <- system.file("extdata", "GRCh37.p13_centromere_UCSC-gapTable.txt", 
+			package = "ichorCNA")
+}
+centromere <- read.delim(centromere,header=T,stringsAsFactors=F,sep="\t")
+save.image(outImage)
+## LOAD IN WIG FILES ##
+numSamples <- nrow(wigFiles)
+
+tumour_copy <- list()
+for (i in 1:numSamples) {
+  id <- wigFiles[i,1]
+  ## create output directories for each sample ##
+  dir.create(paste0(outDir, "/", id, "/"), recursive = TRUE)
+  ### LOAD TUMOUR AND NORMAL FILES ###
+  message("Loading tumour file:", wigFiles[i,1])
+  tumour_reads <- wigToGRanges(wigFiles[i,2])
+  
+  ## LOAD GC/MAP WIG FILES ###
+  # find the bin size and load corresponding wig files #
+  binSize <- as.data.frame(tumour_reads[1,])$width 
+  message("Reading GC and mappability files")
+  if (is.null(gcWig) || gcWig == "None" || gcWig == "NULL"){
+      stop("GC wig file is required")
+  }
+  gc <- wigToGRanges(gcWig)
+  if (is.null(mapWig) || mapWig == "None" || mapWig == "NULL"){
+      message("No mappability wig file input, excluding from correction")
+      map <- NULL
+  } else {
+      map <- wigToGRanges(mapWig)
+  }
+  message("Correcting Tumour")
+  
+  counts <- loadReadCountsFromWig(tumour_reads, chrs = chrs, gc = gc, map = map, 
+                                       centromere = centromere, flankLength = flankLength, 
+                                       targetedSequences = targetedSequences, chrXMedianForMale = chrXMedianForMale,
+                                       genomeStyle = genomeStyle, fracReadsInChrYForMale = fracReadsInChrYForMale,
+                                       chrNormalize = chrNormalize, mapScoreThres = minMapScore)
+  tumour_copy[[id]] <- counts$counts #as(counts$counts, "GRanges")
+  gender <- counts$gender
+  ## load in normal file if provided 
+  if (!is.null(normal_file) && normal_file != "None" && normal_file != "NULL"){
+	message("Loading normal file:", normal_file)
+	normal_reads <- wigToGRanges(normal_file)
+	message("Correcting Normal")
+	counts <- loadReadCountsFromWig(normal_reads, chrs=chrs, gc=gc, map=map, 
+			centromere=centromere, flankLength = flankLength, targetedSequences=targetedSequences,
+			genomeStyle = genomeStyle, chrNormalize = chrNormalize, mapScoreThres = minMapScore)
+	normal_copy <- counts$counts #as(counts$counts, "GRanges")
+	gender.normal <- counts$gender
+  }else{
+	normal_copy <- NULL
+  }
+
+  ### DETERMINE GENDER ###
+  ## if normal file not given, use chrY, else use chrX
+  message("Determining gender...", appendLF = FALSE)
+  gender.mismatch <- FALSE
+  if (!is.null(normal_copy)){
+	if (gender$gender != gender.normal$gender){ #use tumour # use normal if given
+	# check if normal is same gender as tumour
+	  gender.mismatch <- TRUE
+	}
+  }
+  message("Gender ", gender$gender)
+
+  ## NORMALIZE GENOME-WIDE BY MATCHED NORMAL OR NORMAL PANEL (MEDIAN) ##
+  tumour_copy[[id]] <- normalizeByPanelOrMatchedNormal(tumour_copy[[id]], chrs = chrs, 
+      normal_panel = normal_panel, normal_copy = normal_copy, 
+      gender = gender$gender, normalizeMaleX = normalizeMaleX)
+	
+	### OUTPUT FILE ###
+	### PUTTING TOGETHER THE COLUMNS IN THE OUTPUT ###
+	outMat <- as.data.frame(tumour_copy[[id]])
+	#outMat <- outMat[,c(1,2,3,12)]
+	outMat <- outMat[,c("seqnames","start","end","copy")]
+	colnames(outMat) <- c("chr","start","end","log2_TNratio_corrected")
+	outFile <- paste0(outDir,"/",id,".correctedDepth.txt")
+	message(paste("Outputting to:", outFile))
+	write.table(outMat, file=outFile, row.names=F, col.names=T, quote=F, sep="\t")
+
+} ## end of for each sample
+
+chrInd <- as.character(seqnames(tumour_copy[[1]])) %in% chrTrain
+## get positions that are valid
+valid <- tumour_copy[[1]]$valid
+if (length(tumour_copy) >= 2) {
+  for (i in 2:length(tumour_copy)){ 
+    valid <- valid & tumour_copy[[i]]$valid 
+  } 
+}
+save.image(outImage)
+
+### RUN HMM ###
+## store the results for different normal and ploidy solutions ##
+ptmTotalSolutions <- proc.time() # start total timer
+results <- list()
+loglik <- as.data.frame(matrix(NA, nrow = length(normal) * length(ploidy), ncol = 7, 
+                 dimnames = list(c(), c("init", "n_est", "phi_est", "BIC", 
+                 												"Frac_genome_subclonal", "Frac_CNA_subclonal", "loglik"))))
+counter <- 1
+compNames <- rep(NA, nrow(loglik))
+mainName <- rep(NA, length(normal) * length(ploidy))
+#### restart for purity and ploidy values ####
+for (n in normal){
+  for (p in ploidy){
+    if (n == 0.95 & p != 2) {
+        next
+    }
+    logR <- as.data.frame(lapply(tumour_copy, function(x) { x$copy })) # NEED TO EXCLUDE CHR X #
+    param <- getDefaultParameters(logR[valid & chrInd, , drop=F], maxCN = maxCN, includeHOMD = includeHOMD, 
+                ct.sc=scStates, ploidy = floor(p), e=txnE, e.same = 50, strength=txnStrength)
+    param$phi_0 <- rep(p, numSamples)
+    param$n_0 <- rep(n, numSamples)
+    
+    ############################################
+    ######## CUSTOM PARAMETER SETTINGS #########
+    ############################################
+    # 0.1x cfDNA #
+    if (is.null(lambda)){
+			logR.var <- 1 / ((apply(logR, 2, sd, na.rm = TRUE) / sqrt(length(param$ct))) ^ 2)
+			param$lambda <- rep(logR.var, length(param$ct))
+			param$lambda[param$ct %in% c(2)] <- logR.var 
+			param$lambda[param$ct %in% c(1,3)] <- logR.var 
+			param$lambda[param$ct >= 4] <- logR.var / 5
+			param$lambda[param$ct == max(param$ct)] <- logR.var / 15
+			param$lambda[param$ct.sc.status] <- logR.var / 10
+    }else{
+			param$lambda[param$ct %in% c(2)] <- lambda[2]
+			param$lambda[param$ct %in% c(1)] <- lambda[1]
+			param$lambda[param$ct %in% c(3)] <- lambda[3]
+			param$lambda[param$ct >= 4] <- lambda[4]
+			param$lambda[param$ct == max(param$ct)] <- lambda[2] / 15
+			param$lambda[param$ct.sc.status] <- lambda[2] / 10
+		}
+		param$alphaLambda <- rep(lambdaScaleHyperParam, length(param$ct))  
+    # 1x bulk tumors #
+    #param$lambda[param$ct %in% c(2)] <- 2000
+    #param$lambda[param$ct %in% c(1)] <- 1750
+    #param$lambda[param$ct %in% c(3)] <- 1750
+    #param$lambda[param$ct >= 4] <- 1500
+    #param$lambda[param$ct == max(param$ct)] <- 1000 / 25
+		#param$lambda[param$ct.sc.status] <- 1000 / 75
+		#param$alphaLambda[param$ct.sc.status] <- 4
+		#param$alphaLambda[param$ct %in% c(1,3)] <- 5
+		#param$alphaLambda[param$ct %in% c(2)] <- 5
+		#param$alphaLambda[param$ct == max(param$ct)] <- 4
+				
+		#############################################
+		################ RUN HMM ####################
+		#############################################
+    hmmResults.cor <- HMMsegment(tumour_copy, valid, dataType = "copy", 
+                                 param = param, chrTrain = chrTrain, maxiter = 50,
+                                 estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
+                                 estimateSubclone = estimateScPrevalence, verbose = TRUE)
+                                     
+    for (s in 1:numSamples){
+  		iter <- hmmResults.cor$results$iter
+  		id <- names(hmmResults.cor$cna)[s]
+
+  		## convert full diploid solution (of chrs to train) to have 1.0 normal or 0.0 purity
+  		## check if there is an altered segment that has at least a minimum # of bins
+  		segsS <- hmmResults.cor$results$segs[[s]]
+  		segsS <- segsS[segsS$chr %in% chrTrain, ]
+  		segAltInd <- which(segsS$event != "NEUT")
+  		maxBinLength = -Inf
+  		if (sum(segAltInd) > 0){
+  			maxInd <- which.max(segsS$end[segAltInd] - segsS$start[segAltInd] + 1)
+  			maxSegRD <- GRanges(seqnames=segsS$chr[segAltInd[maxInd]], 
+  								ranges=IRanges(start=segsS$start[segAltInd[maxInd]], end=segsS$end[segAltInd[maxInd]]))
+  			hits <- findOverlaps(query=maxSegRD, subject=tumour_copy[[s]][valid, ])
+  			maxBinLength <- length(subjectHits(hits))
+  		}
+  		## check if there are proportion of total bins altered 
+  		# if segment size smaller than minSegmentBins, but altFrac > altFracThreshold, then still estimate TF
+  		cnaS <- hmmResults.cor$cna[[s]]
+  		altInd <- cnaS[cnaS$chr %in% chrTrain, "event"] == "NEUT"
+  		altFrac <- sum(!altInd, na.rm=TRUE) / length(altInd)
+  		if ((maxBinLength <= minSegmentBins) & (altFrac <= altFracThreshold)){
+  			hmmResults.cor$results$n[s, iter] <- 1.0
+  		}
+
+      # correct integer copy number based on estimated purity and ploidy
+      correctedResults <- correctIntegerCN(cn = hmmResults.cor$cna[[s]],
+            segs = hmmResults.cor$results$segs[[s]], 
+            purity = 1 - hmmResults.cor$results$n[s, iter], ploidy = hmmResults.cor$results$phi[s, iter],
+            cellPrev = 1 - hmmResults.cor$results$sp[s, iter], 
+            maxCNtoCorrect.autosomes = maxCN, maxCNtoCorrect.X = maxCN, minPurityToCorrect = minTumFracToCorrect, 
+            gender = gender$gender, chrs = chrs, correctHOMD = includeHOMD)
+      hmmResults.cor$results$segs[[s]] <- correctedResults$segs
+      hmmResults.cor$cna[[s]] <- correctedResults$cn
+
+      	## plot solution ##
+  		outPlotFile <- paste0(outDir, "/", id, "/", id, "_genomeWide_", "n", n, "-p", p)
+  		mainName[counter] <- paste0(id, ", n: ", n, ", p: ", p, ", log likelihood: ", signif(hmmResults.cor$results$loglik[hmmResults.cor$results$iter], digits = 4))
+  		plotGWSolution(hmmResults.cor, s=s, outPlotFile=outPlotFile, plotFileType=plotFileType, 
+            logR.column = "logR", call.column = "Corrected_Call",
+  					 plotYLim=plotYLim, estimateScPrevalence=estimateScPrevalence, seqinfo=seqinfo, main=mainName[counter])
+    }
+    iter <- hmmResults.cor$results$iter
+    results[[counter]] <- hmmResults.cor
+    loglik[counter, "loglik"] <- signif(hmmResults.cor$results$loglik[iter], digits = 4)
+    subClonalBinCount <- unlist(lapply(hmmResults.cor$cna, function(x){ sum(x$subclone.status) }))
+    fracGenomeSub <- subClonalBinCount / unlist(lapply(hmmResults.cor$cna, function(x){ nrow(x) }))
+    fracAltSub <- subClonalBinCount / unlist(lapply(hmmResults.cor$cna, function(x){ sum(x$copy.number != 2) }))
+    fracAltSub <- lapply(fracAltSub, function(x){if (is.na(x)){0}else{x}})
+    loglik[counter, "Frac_genome_subclonal"] <- paste0(signif(fracGenomeSub, digits=2), collapse=",")
+    loglik[counter, "Frac_CNA_subclonal"] <- paste0(signif(as.numeric(fracAltSub), digits=2), collapse=",")
+    loglik[counter, "init"] <- paste0("n", n, "-p", p)
+    loglik[counter, "n_est"] <- paste(signif(hmmResults.cor$results$n[, iter], digits = 2), collapse = ",")
+    loglik[counter, "phi_est"] <- paste(signif(hmmResults.cor$results$phi[, iter], digits = 4), collapse = ",")
+
+    counter <- counter + 1
+  }
+}
+## get total time for all solutions ##
+elapsedTimeSolutions <- proc.time() - ptmTotalSolutions
+message("Total ULP-WGS HMM Runtime: ", format(elapsedTimeSolutions[3] / 60, digits = 2), " min.")
+
+### SAVE R IMAGE ###
+save.image(outImage)
+#save(tumour_copy, results, loglik, file=paste0(outDir,"/",id,".RData"))
+
+### SELECT SOLUTION WITH LARGEST LIKELIHOOD ###
+loglik <- loglik[!is.na(loglik$init), ]
+if (estimateScPrevalence){ ## sort but excluding solutions with too large % subclonal 
+	fracInd <- which(loglik[, "Frac_CNA_subclonal"] <= maxFracCNASubclone & 
+						 		   loglik[, "Frac_genome_subclonal"] <= maxFracGenomeSubclone)
+	if (length(fracInd) > 0){ ## if there is a solution satisfying % subclonal
+		ind <- fracInd[order(loglik[fracInd, "loglik"], decreasing=TRUE)]
+	}else{ # otherwise just take largest likelihood
+		ind <- order(as.numeric(loglik[, "loglik"]), decreasing=TRUE) 
+	}
+}else{#sort by likelihood only
+  ind <- order(as.numeric(loglik[, "loglik"]), decreasing=TRUE) 
+}
+
+#new loop by order of solutions (ind)
+outPlotFile <- paste0(outDir, "/", id, "/", id, "_genomeWide_all_sols")
+for(i in 1:length(ind)) {
+  hmmResults.cor <- results[[ind[i]]]
+  turnDevOff <- FALSE
+  turnDevOn <- FALSE
+  if (i == 1){
+  	turnDevOn <- TRUE
+  }
+  if (i == length(ind)){
+  	turnDevOff <- TRUE
+  }
+  plotGWSolution(hmmResults.cor, s=s, outPlotFile=outPlotFile, plotFileType="pdf", 
+                     logR.column = "logR", call.column = "Corrected_Call",
+                     plotYLim=plotYLim, estimateScPrevalence=estimateScPrevalence, 
+                     seqinfo = seqinfo,
+                     turnDevOn = turnDevOn, turnDevOff = turnDevOff, main=mainName[ind[i]])
+}
+
+hmmResults.cor <- results[[ind[1]]]
+hmmResults.cor$results$loglik <- as.data.frame(loglik)
+hmmResults.cor$results$gender <- gender$gender
+hmmResults.cor$results$chrYCov <- gender$chrYCovRatio
+hmmResults.cor$results$chrXMedian <- gender$chrXMedian
+hmmResults.cor$results$coverage <- coverage
+
+outputHMM(cna = hmmResults.cor$cna, segs = hmmResults.cor$results$segs, 
+                      results = hmmResults.cor$results, patientID = patientID, outDir=outDir)
+outFile <- paste0(outDir, "/", patientID, ".params.txt")
+outputParametersToFile(hmmResults.cor, file = outFile)
+
+## plot solutions for all samples 
+plotSolutions(hmmResults.cor, tumour_copy, chrs, outDir, numSamples=numSamples,
+              logR.column = "logR", call.column = "Corrected_Call",
+              plotFileType=plotFileType, plotYLim=plotYLim, seqinfo = seqinfo,
+              estimateScPrevalence=estimateScPrevalence, maxCN=maxCN)
\ No newline at end of file
diff --git a/modules/ichorcna/CHANGELOG.md b/modules/ichorcna/CHANGELOG.md
index 2a9de0480..f603ff45c 100755
--- a/modules/ichorcna/CHANGELOG.md
+++ b/modules/ichorcna/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to the `ichorcna` module will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.1] - 2022-01-08
+
+This release was authored by jawong.
+
+IchorCNA has been updated to be compatible with .cram files.
+
+Previously the issue with IchorCNA is the use of hmmcopy_utils ReadCounter, which is an old C tool that only works with bam files. With this version, I leverage deeptools BamCoverage to generate bigwig tracks from bam/cram, then I use UCSC bigWigToWig to lift it back to a wig file.
+
+Notably, deeptool's bamCoverage stitches regions together that have identical values (i.e. the centromeres, which are all denoted as 0). This causes issues with ichorCNA since the reference .wig files have a specific number of lines (corresponding to the number of bins of X size across each reference genome). Also, the wig format used in ichorCNA requires a very specific notation (it is hard coded in their software). "fixedStep chrom=1 start=1 step=1000000 span=1000000" as a header. Every line after that is just the coverage value of that bin.
+
+The UCSC BigWigToWig command converts the bigwig to essentially a bedGraph format (chr,start,end,coverage). Therefore, I leveraged bedOps --chop to slice the bedGraph into windows of binSize (wildcard), and then I used bedtools to intersect the bedGraph back to the sliced regions (thus maintaining the coverage value of the wig file, which gets deleted by bedOps).
+
+Note: setting up ucsc-bigWigToWig conda env may not work for outdated OS (ex. numbers) 
+(Problem: nothing provides __glibc >=2.17 needed by libgcc-ng-9.3.0-h5101ec6_17
+Problem: nothing provides __glibc >=2.17 needed by libstdcxx-ng-9.3.0-hd4cf53a_17)
+Run this on a server that is compatible first to set up the env (ex. gphost) then you can launch it on numbers.
+
+
 ## [1.0] - 2021-03-31
 
 This release was authored by jawong.
diff --git a/modules/liftover/1.1/liftover.smk b/modules/liftover/1.1/liftover.smk
index 3392c1255..2b3be5455 100644
--- a/modules/liftover/1.1/liftover.smk
+++ b/modules/liftover/1.1/liftover.smk
@@ -11,11 +11,31 @@
 
 ##### SETUP #####
 
-
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["liftover"]`
 CFG = op.setup_module(
@@ -36,7 +56,7 @@ rule _liftover_input_seg:
     output:
         seg = CFG["dirs"]["inputs"] + "{genome_build}/seg/{tumour_sample_id}--{normal_sample_id}.{tool}.igv.seg"
     run:
-        op.relative_symlink(input.seg, output.seg)
+        op.absolute_symlink(input.seg, output.seg)
 
 
 # Convert initial seg file into bed format
@@ -121,7 +141,7 @@ rule _liftover_output_seg:
     output:
         seg = CFG["dirs"]["outputs"] + "seg/{tumour_sample_id}--{normal_sample_id}.{tool}.hg19.igv.seg"
     run:
-        op.relative_symlink(input.seg, output.seg)
+        op.relative_symlink(input.seg, output.seg, in_module=True )
 
 
 # Generates the target sentinels for each run, which generate the symlinks
diff --git a/modules/liftover/1.2/config/default.yaml b/modules/liftover/1.2/config/default.yaml
index 6baeff2d7..29c3fcd76 100644
--- a/modules/liftover/1.2/config/default.yaml
+++ b/modules/liftover/1.2/config/default.yaml
@@ -1,20 +1,22 @@
 lcr-modules:
-
     liftover:
-        # Please specify parameters marked as __UPDATE__ using provided examples. 
+        # Please specify parameters marked as __UPDATE__ using provided examples.
         # The "tool" option means the name of the tool used to produce initial .seg files. It will be preserved in the name of the final output file.
-        # The "sample_seg" option points to the initial .seg files to be converted. 
-        # The "min_mismatch" option is a minimum ratio of bases that must remap when transforming coordinates. 
+        # The "sample_seg" option points to the initial .seg files to be converted.
+        # The "min_mismatch" option is a minimum ratio of bases that must remap when transforming coordinates.
         # The "min_mismatch" must be a number with maximum of 1 (100% of bases must match when remapping).
         #tool: "battenberg"
         tool: "__UPDATE__" # Specify the name of the tool that produced the .seg files. For example, "battenberg"
         #you can include the version if it helps simplify the pattern matching, for example sequenza-1.0
-        dirs: 
+        dirs:
             _parent: "__UPDATE__" #example for combining with Battenberg module: "results/battenberg-1.0_liftover-1.1"
         inputs:
             sample_seg: "__UPDATE__"
             # Path to seg files for lifting. For example:
+            # For battenberg only:
             # sample_seg: "data/{tool}/{genome_build}/{tumour_sample_id}--{normal_sample_id}_subclones.igv.seg"
+            # For Sequenza
+            # results/{tool}/99-outputs/filtered_seg/{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--matched.igv.seg
         scratch_subdirectories: "scratch/"
         options:
             chr_colNum: 2
diff --git a/modules/liftover/1.2/liftover.smk b/modules/liftover/1.2/liftover.smk
index f72add764..594452167 100644
--- a/modules/liftover/1.2/liftover.smk
+++ b/modules/liftover/1.2/liftover.smk
@@ -28,8 +28,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -62,9 +64,13 @@ rule _liftover_input_seg:
     input:
         seg = CFG["inputs"]["sample_seg"]
     output:
-        seg = CFG["dirs"]["inputs"] + "{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.seg"
+        seg = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.seg",
+        another_seg = CFG["dirs"]["outputs"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.seg"
+    wildcard_constraints:
+        tool = CFG["tool"]
     run:
         op.relative_symlink(input.seg, output.seg)
+        op.relative_symlink(input.seg, output.another_seg)
 
 
 # Convert initial seg file into bed format
@@ -72,10 +78,10 @@ rule _liftover_seg_2_bed:
     input:
         seg = str(rules._liftover_input_seg.output.seg)
     output:
-        bed = CFG["dirs"]["seg2bed"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.bed",
-        header = temp(CFG["dirs"]["seg2bed"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.bed.header")
+        bed = CFG["dirs"]["seg2bed"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.bed",
+        header = temp(CFG["dirs"]["seg2bed"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.bed.header")
     log:
-        stderr = CFG["logs"]["seg2bed"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.stderr.log"
+        stderr = CFG["logs"]["seg2bed"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.stderr.log"
     params:
         opts = CFG["options"]["seg2bed2seg"],
         chr_colNum = CFG["options"]["chr_colNum"],
@@ -108,10 +114,10 @@ rule _run_liftover:
         native = rules._liftover_seg_2_bed.output.bed,
         chains = get_chain
     output:
-        lifted = CFG["dirs"]["liftover"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.bed",
-        unmapped = CFG["dirs"]["liftover"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.unmapped.bed"
+        lifted = CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.bed",
+        unmapped = CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.unmapped.bed"
     log:
-        stderr = CFG["logs"]["liftover"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.stderr.log"
+        stderr = CFG["logs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.stderr.log"
     params:
         mismatch = CFG["options"]["min_mismatch"]
     conda:
@@ -132,9 +138,9 @@ rule _liftover_sort:
     input:
         lifted = rules._run_liftover.output.lifted
     output:
-        lifted_sorted = CFG["dirs"]["liftover"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.sorted.bed"
+        lifted_sorted = CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.sorted.bed"
     log:
-        stderr = CFG["logs"]["liftover"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.sorted.stderr.log"
+        stderr = CFG["logs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.sorted.stderr.log"
     shell:
         op.as_one_line("""
         sort -k1,1 -k2,2n -V {input.lifted} |
@@ -150,9 +156,9 @@ rule _liftover_bed_2_seg:
         lifted_sorted = str(rules._liftover_sort.output.lifted_sorted),
         headers = str(rules._liftover_seg_2_bed.output.header)
     output:
-        seg_lifted = CFG["dirs"]["bed2seg"] + "from--{genome_build}/raw_segments/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.seg"
+        seg_lifted = CFG["dirs"]["bed2seg"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.seg"
     log:
-        stderr = CFG["logs"]["bed2seg"] + "from--{genome_build}/raw_segments/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.stderr.log"
+        stderr = CFG["logs"]["bed2seg"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.stderr.log"
     params:
         opts = CFG["options"]["seg2bed2seg"]
     conda:
@@ -172,10 +178,10 @@ rule _liftover_fill_segments:
     input:
         seg_lifted = str(rules._liftover_bed_2_seg.output.seg_lifted)
     output:
-        seg_filled = CFG["dirs"]["bed2seg"] + "from--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.filled.seg"
+        seg_filled = CFG["dirs"]["bed2seg"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.filled.seg"
     log:
-        stdout = CFG["logs"]["bed2seg"] + "from--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.filled.stdout.log",
-        stderr = CFG["logs"]["bed2seg"] + "from--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.filled.stderr.log"
+        stdout = CFG["logs"]["bed2seg"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.filled.stdout.log",
+        stderr = CFG["logs"]["bed2seg"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.filled.stderr.log"
     params:
         script = CFG["options"]["fill_segments"],
         chromArm = op.switch_on_wildcard("chain", CFG["chromArm"])
@@ -197,7 +203,7 @@ rule _liftover_output_seg:
     input:
         seg = str(rules._liftover_fill_segments.output.seg_filled)
     output:
-        seg = CFG["dirs"]["outputs"] + "from--{genome_build}/{tumour_sample_id}--{normal_sample_id}.{tool}.lifted_{chain}.seg"
+        seg = CFG["dirs"]["outputs"] + "from--{seq_type}--{genome_build}/{tumour_sample_id}--{normal_sample_id}--{pair_status}.{tool}.lifted_{chain}.seg"
     run:
         op.relative_symlink(input.seg, output.seg, in_module=True)
 
@@ -207,12 +213,15 @@ rule _liftover_all:
     input:
         expand(
             [
-                str(rules._liftover_output_seg.output.seg)
+                str(rules._liftover_output_seg.output.seg),
+                str(rules._liftover_input_seg.output.another_seg)
             ],
             zip,  # Run expand() with zip(), not product()
             tumour_sample_id=CFG["runs"]["tumour_sample_id"],
             normal_sample_id=CFG["runs"]["normal_sample_id"],
             genome_build = CFG["runs"]["tumour_genome_build"],
+            seq_type=CFG["runs"]["tumour_seq_type"],
+            pair_status=CFG["runs"]["pair_status"],
             #repeat the tool name N times in expand so each pair in run is used
             tool=[CFG["tool"]] * len(CFG["runs"]["tumour_sample_id"]),
             chain=["hg38ToHg19" if "38" in str(x) else "hg19ToHg38" for x in CFG["runs"]["tumour_genome_build"]]
diff --git a/modules/liftover/2.0/config/default.yaml b/modules/liftover/2.0/config/default.yaml
new file mode 100644
index 000000000..bbb176351
--- /dev/null
+++ b/modules/liftover/2.0/config/default.yaml
@@ -0,0 +1,44 @@
+lcr-modules:
+    liftover:
+        # Please specify parameters marked as __UPDATE__ using provided examples.
+        # The "tool" option means the name of the tool used to produce initial .seg files. It will be preserved in the name of the final output file.
+        # The "sample_seg" option points to the initial .seg files to be converted.
+        # The "min_mismatch" option is a minimum ratio of bases that must remap when transforming coordinates.
+        # The "min_mismatch" must be a number with maximum of 1 (100% of bases must match when remapping).
+        #tool: "battenberg"
+        tool: "__UPDATE__" # Specify the name of the tool that produced the .seg files. For example, "battenberg"
+        input_type: "__UPDATE__" # seg or bedpe 
+        cnv_tools: "controlfreec|battenberg|sequenza"
+        sv_tools: "manta|gridss|gridss_manta" # Add custom tool names separated by "|"
+        #you can include the version if it helps simplify the pattern matching, for example sequenza-1.0
+        dirs:
+            _parent: "__UPDATE__" #example for combining with Battenberg module: "results/battenberg-1.0_liftover-1.1"
+        inputs:
+            sample_file: "__UPDATE__"
+            # Path to seg or bedpe files for lifting. For example:
+            # sample_file: "data/{tool}/{genome_build}/{tumour_sample_id}--{normal_sample_id}_subclones.igv.seg"
+        scratch_subdirectories: ""
+        options:
+            chr_colNum: 2 # These colNum values are ignored for bedpe files
+            start_colNum: 3
+            end_colNum: 4
+            min_mismatch: 0.95
+            #min_mismatch: __UPDATE__ # Minimum ratio of bases that must remap when transforming coordinates. For example, 0.95
+            convert2bed: "{MODSDIR}/src/convert_for_liftover.py"
+            fill_segments: "{SCRIPTSDIR}/fill_segments/1.0/fill_segments.py"
+        conda_envs:
+            liftover-366: "{MODSDIR}/envs/liftover-366.yaml"
+            fill_segments: "{SCRIPTSDIR}/fill_segments/1.0/fill_segments.yaml"
+        # tsv with chromosome coordinates for each genome build
+        chromArm:
+            hg19ToHg38: "{SCRIPTSDIR}/fill_segments/1.0/src/chromArm.hg38.tsv"
+            hg38ToHg19: "{SCRIPTSDIR}/fill_segments/1.0/src/chromArm.hg19.tsv"
+        pairing_config:
+            genome:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: null
+                run_paired_tumours_as_unpaired: False
+            capture:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: null
+                run_paired_tumours_as_unpaired: False
diff --git a/modules/liftover/2.0/envs/liftover-366.yaml b/modules/liftover/2.0/envs/liftover-366.yaml
new file mode 100644
index 000000000..6820fd3ad
--- /dev/null
+++ b/modules/liftover/2.0/envs/liftover-366.yaml
@@ -0,0 +1,45 @@
+name: liftover
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - ca-certificates=2020.6.20
+  - certifi=2020.6.20
+  - ld_impl_linux-64=2.35
+  - libblas=3.8.0
+  - libcblas=3.8.0
+  - libffi=3.2.1
+  - libgcc-ng=9.3.0
+  - libgfortran-ng=9.3.0
+  - libgfortran5=9.3.0
+  - libgomp=9.3.0
+  - liblapack=3.8.0
+  - libopenblas=0.3.10
+  - libpng=1.6.37
+  - libstdcxx-ng=9.3.0
+  - libuuid=2.32.1
+  - mysql-connector-c=6.1.11
+  - ncurses=6.2
+  - numpy=1.19.2
+  - openssl=1.1.1h
+  - pandas=1.1.3
+  - pip=20.2.4
+  - python=3.9.0
+  - python-dateutil=2.8.1
+  - python_abi=3.9
+  - pytz=2020.1
+  - readline=8.0
+  - setuptools=49.6.0
+  - simplejson=3.17.2
+  - six=1.15.0
+  - sqlite=3.33.0
+  - tk=8.6.10
+  - tzdata=2020c
+  - ucsc-liftover=377
+  - wheel=0.35.1
+  - xz=5.2.5
+  - zlib=1.2.11
+prefix: /home/dreval/miniconda3/envs/liftover
diff --git a/modules/liftover/2.0/liftover.smk b/modules/liftover/2.0/liftover.smk
new file mode 100644
index 000000000..6ad2f1722
--- /dev/null
+++ b/modules/liftover/2.0/liftover.smk
@@ -0,0 +1,276 @@
+#!/usr/bin/env snakemake
+
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Kostiantyn Dreval
+# Module Author:    Kostiantyn Dreval
+# Contributors:     Laura Hilton
+
+
+##### SETUP #####
+
+
+# Import package with useful functions for developing analysis modules
+import oncopipe as op
+
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
+# Setup module and store module-specific configuration in `CFG`
+# `CFG` is a shortcut to `config["lcr-modules"]["liftover"]`
+CFG = op.setup_module(
+    name = "liftover",
+    version = "2.0",
+    subdirectories = ["inputs", "convert2bed", "liftover", "restore_from_bed", "outputs"])
+
+# Define rules to be run locally when using a compute cluster
+localrules:
+    _liftover_input_file,
+    _liftover_convert_2_bed,
+    _run_liftover,
+    _liftover_sort,
+    _liftover_bed_2_seg,
+    _liftover_fill_segments,
+    _liftover_output,
+    _liftover_all
+
+# Define tool_name values for CNV vs BEDPE liftover to use as wildcard constraints
+cnv_tools = CFG["cnv_tools"]
+sv_tools = CFG["sv_tools"]
+
+##### RULES #####
+
+
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _liftover_input_file:
+    input:
+        tsv = CFG["inputs"]["sample_file"]
+    output:
+        tsv = CFG["dirs"]["inputs"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}." + CFG["input_type"],
+        another_tsv = CFG["dirs"]["outputs"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}." + CFG["input_type"]
+    wildcard_constraints:
+        tool = CFG["tool"]
+    run:
+        op.relative_symlink(input.tsv, output.tsv)
+        op.relative_symlink(input.tsv, output.another_tsv)
+
+
+# Convert initial seg file into bed format
+rule _liftover_convert_2_bed:
+    input:
+        seg = str(rules._liftover_input_file.output.tsv)
+    output:
+        bed = CFG["dirs"]["convert2bed"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.bed",
+        header = temp(CFG["dirs"]["convert2bed"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.bed.header")
+    log:
+        stderr = CFG["logs"]["convert2bed"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.stderr.log"
+    params:
+        script = CFG["options"]["convert2bed"],
+        chr_colNum = lambda w: {"seg": config["lcr-modules"]["liftover"]["options"]["chr_colNum"], "bedpeA": 1, "bedpeB": 4}[w.type],
+        start_colNum = lambda w: {"seg": config["lcr-modules"]["liftover"]["options"]["start_colNum"], "bedpeA": 2, "bedpeB": 5}[w.type],
+        end_colNum = lambda w: {"seg": config["lcr-modules"]["liftover"]["options"]["end_colNum"], "bedpeA": 3, "bedpeB": 6}[w.type],
+    conda:
+        CFG["conda_envs"]["liftover-366"]
+    shell:
+        op.as_one_line("""
+        python {params.script} 
+        --inType {wildcards.type}
+        --input {input.seg} 
+        --output {output.bed} 
+        --chromColnum {params.chr_colNum} 
+        --startColnum {params.start_colNum} 
+        --endColnum {params.end_colNum}
+        2> {log.stderr}
+        """)
+
+
+def get_chain(wildcards):
+    if "38" in str({wildcards.genome_build}):
+        return reference_files("genomes/{genome_build}/chains/grch38/hg38ToHg19.over.chain")
+    else:
+        return reference_files("genomes/{genome_build}/chains/grch37/hg19ToHg38.over.chain")
+
+
+# Convert the bed file in hg38 coordinates into hg19 coordinates
+rule _run_liftover:
+    input:
+        native = rules._liftover_convert_2_bed.output.bed,
+        chains = get_chain
+    output:
+        lifted = temp(CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.lifted_{chain}.bed"),
+        unmapped = CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.lifted_{chain}.unmapped.bed"
+    log:
+        stderr = CFG["logs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.lifted_{chain}.stderr.log"
+    params:
+        mismatch = CFG["options"]["min_mismatch"]
+    conda:
+        CFG["conda_envs"]["liftover-366"]
+    wildcard_constraints:
+        chain = "hg38ToHg19|hg19ToHg38"
+    shell:
+        op.as_one_line("""
+        liftOver -minMatch={params.mismatch}
+        {input.native} {input.chains} 
+        {output.lifted} {output.unmapped}
+        2> {log.stderr}
+        """)
+
+# Sort liftover output
+# Here, the perl line will filter out non-standard chromosomes from the output
+rule _liftover_sort:
+    input:
+        lifted = rules._run_liftover.output.lifted
+    output:
+        lifted_sorted = temp(CFG["dirs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.lifted_{chain}.sorted.bed")
+    log:
+        stderr = CFG["logs"]["liftover"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}--{type}.lifted_{chain}.sorted.stderr.log"
+    shell:
+        op.as_one_line("""
+        sort -k1,1 -k2,2n -V {input.lifted} |
+        perl -ne 'print if /^(chr)*[\dX]+\s.+/'
+        > {output.lifted_sorted}
+        2> {log.stderr}
+        """)
+
+
+# Convert the bed file in lifted coordinates into seg format
+rule _liftover_bed_2_seg:
+    input:
+        lifted_sorted = expand(rules._liftover_sort.output.lifted_sorted, type = "seg", allow_missing = True),
+        headers = expand(rules._liftover_convert_2_bed.output.header, type = "seg", allow_missing = True)
+    output:
+        seg_lifted = CFG["dirs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.seg"
+    log:
+        stderr = CFG["logs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.stderr.log"
+    params:
+        script = CFG["options"]["convert2bed"]
+    conda:
+        CFG["conda_envs"]["liftover-366"]
+    wildcard_constraints: 
+        tool = cnv_tools
+    shell:
+        op.as_one_line("""
+        python {params.script} 
+        --inType "bed"
+        --input {input.lifted_sorted}
+        --column-header {input.headers}
+        --output {output.seg_lifted} 
+        2> {log.stderr}
+        """)
+
+rule _liftover_bed_2_bedpe:
+    input:
+        lifted_sorted = expand(rules._liftover_sort.output.lifted_sorted, type = ["bedpeA", "bedpeB"], allow_missing = True),
+        headers = expand(rules._liftover_convert_2_bed.output.header, type = ["bedpeA", "bedpeB"], allow_missing = True)
+    output:
+        bedpe_lifted = CFG["dirs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.bedpe"
+    log:
+        stderr = CFG["logs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/raw_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.stderr.log"
+    params:
+        opts = CFG["options"]["convert2bed"]
+    conda:
+        CFG["conda_envs"]["liftover-366"]
+    wildcard_constraints: 
+        tool = sv_tools
+    shell:
+        op.as_one_line("""
+        python {params.opts} 
+        --inType "bedpeA_bedpeB"
+        --input {input.lifted_sorted[0]}
+        --column-header {input.headers[0]}
+        --output {output.bedpe_lifted} 
+        2> {log.stderr}
+        """)
+
+
+# Fill in empty segments after lifting them over
+rule _liftover_fill_segments:
+    input:
+        seg_lifted = str(rules._liftover_bed_2_seg.output.seg_lifted)
+    output:
+        seg_filled = CFG["dirs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.filled.seg"
+    log:
+        stdout = CFG["logs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.filled.stdout.log",
+        stderr = CFG["logs"]["restore_from_bed"] + "from--{seq_type}--{genome_build}/filled_segments/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}.filled.stderr.log"
+    params:
+        script = CFG["options"]["fill_segments"],
+        chromArm = op.switch_on_wildcard("chain", CFG["chromArm"])
+    conda:
+        CFG["conda_envs"]["liftover-366"]
+    wildcard_constraints: 
+        tool = cnv_tools
+    shell:
+        op.as_one_line("""
+        python3 {params.script}
+        --input {input.seg_lifted}
+        --output {output.seg_filled}
+        --chromArm {params.chromArm}
+        > {log.stdout}
+        2> {log.stderr}
+        """)
+
+def get_final_output(wildcards): 
+    if wildcards.tool in cnv_tools: 
+        output = str(rules._liftover_fill_segments.output.seg_filled)
+    if wildcards.tool in sv_tools: 
+        output = str(rules._liftover_bed_2_bedpe.output.bedpe_lifted)
+    return output
+
+# Symlinks the final output files into the module results directory (under '99-outputs/')
+rule _liftover_output:
+    input:
+        get_final_output
+    output:
+        CFG["dirs"]["outputs"] + "from--{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{tool}.lifted_{chain}." + CFG["input_type"]
+    run:
+        op.relative_symlink(input, output, in_module=True)
+
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _liftover_all:
+    input:
+        expand(
+            [
+                str(rules._liftover_output.output),
+                str(rules._liftover_input_file.output.another_tsv)
+            ],
+            zip,  # Run expand() with zip(), not product()
+            tumour_id=CFG["runs"]["tumour_sample_id"],
+            normal_id=CFG["runs"]["normal_sample_id"],
+            genome_build = CFG["runs"]["tumour_genome_build"],
+            seq_type=CFG["runs"]["tumour_seq_type"],
+            pair_status=CFG["runs"]["pair_status"],
+            #repeat the tool name N times in expand so each pair in run is used
+            tool=[CFG["tool"]] * len(CFG["runs"]["tumour_sample_id"]),
+            chain=["hg38ToHg19" if "38" in str(x) else "hg19ToHg38" for x in CFG["runs"]["tumour_genome_build"]]
+            )
+            
+            
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/liftover/2.0/schemas/base-1.0.yaml b/modules/liftover/2.0/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/liftover/2.0/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/liftover/2.0/src/convert_for_liftover.py b/modules/liftover/2.0/src/convert_for_liftover.py
new file mode 100644
index 000000000..bcc779480
--- /dev/null
+++ b/modules/liftover/2.0/src/convert_for_liftover.py
@@ -0,0 +1,220 @@
+"""
+This script will convert segmentation files into BED format for conversion of genomic coordinates by liftOver tool.
+In addition, this script can generate segmentation file from BED file after liftOver conversion.
+"""
+
+
+#!/usr/bin/python
+
+import pandas as pd
+import argparse
+import simplejson
+import sys
+
+
+
+def main():
+    # initiate the parser and handle arguments from command line
+    args = parse_args()
+
+    # read required first argument from shell command as path to .seg file to be converted
+    input_file = args.input
+    # read required second argument from shell command as path to resulting .bed file
+    output_file = args.output
+    # determine the format of input file to understand which format to convert
+    input_format = args.inType
+    # if input is .seg file, convert it to the .bed file and have separate file created with a header
+    if input_format in ["seg", "bedpeA", "bedpeB"]:
+        check_arguments(args, input_format)
+        chromosome = args.chromColnum-1
+        start = args.startColnum-1
+        end = args.endColnum-1
+        tsv_to_bed(input_file, output_file, chromosome, start, end, input_format)
+    # if input is .bed file, load appropriate header and convert it to the .seg file
+    elif input_format == "bed":
+        check_arguments(args, input_format)
+        column_names_path=args.column_header
+        with open(column_names_path) as f:
+            column_names=simplejson.load(f)
+        bed_to_seg(input_file, column_names, output_file)
+    elif input_format == "bedpeA_bedpeB": 
+        check_arguments(args, input_format)
+        bedpeA = input_file
+        bedpeB = input_file.replace("bedpeA", "bedpeB")
+        column_names_path=args.column_header
+        with open(column_names_path) as f:
+            column_names=simplejson.load(f)
+        bed_to_bedpe(bedpeA, bedpeB, column_names, output_file)
+    else:
+        sys.exit('Please provide input file in either .seg, .bedpe, or .bed format')
+
+# Counts VCF header lines for bedpe file parsing
+def count_header(fname):
+    i=0
+    with open(fname, 'r') as f:
+        for line in f:
+            if "##" in line:
+                i += 1
+    return i
+
+
+def tsv_to_bed(input_file, output_file, chromosome, start, end, inType):
+    
+    # Some bedpe files start with a VCF header
+    # Since the header is distinguished by "##" and Pandas can only use a single-character comment, 
+    # we must parse the file and count the comment lines and indicate to read_table how many 
+    # lines to skip. 
+
+    if inType in ["bedpeA", "bedpeB"]: 
+        skip_lines = count_header(input_file)
+        print("Skipping " + str(skip_lines) + " lines from the bedpe VCF header. ")
+    else:
+        skip_lines = 0
+    
+    # import .seg file
+    seg = pd.read_table(input_file, skiprows=skip_lines)
+
+    seg.rename(columns={seg.columns[chromosome]: "chrom", seg.columns[start]: "start", seg.columns[end]: "end"}, inplace=True)
+    seg.fillna('NA', inplace=True) 
+
+    # Drop partner coordinates for bedpe so column 4 is identical in both files
+    if(inType == "bedpeA"):
+        seg.drop(seg.columns[[3, 4, 5]], axis = 1, inplace=True)
+    if(inType == "bedpeB"): 
+        seg.drop(seg.columns[[0, 1, 2]], axis = 1, inplace=True)
+
+    # rearrange columns order to have first 3 cols according the BED format
+    bed = seg.loc[:, ['chrom', 'start', 'end']]
+    bed_other = seg.drop(['chrom', 'start', 'end'], axis=1)
+
+    # Create collapsed column name from all non-coordinate colnames
+    other_colnames = "|".join(list(bed_other.columns))
+
+    # Create a new df storing all non-coordinate column values collapsed
+    # other_collapsed = pd.DataFrame()
+    # other_collapsed = other_collapsed.append({other_colnames: bed_other.apply(lambda x: '|'.join(x.astype(str).values), axis=1)}, ignore_index = True)
+
+    bed.loc[:, other_colnames] = bed_other.apply(lambda x: '|'.join(x.astype(str).values), axis=1)
+    # shift start position by 1 to the left
+    bed.loc[:, 'start'] = bed['start'].apply(lambda x: int(x-1))
+    # check that chromosomes are prefixed and prefix if they are not
+    chrom = list(bed['chrom'])
+    for i in range(len(chrom)):
+        if 'chr' not in str(chrom[i]):
+            chrom[i]='chr'+str(chrom[i])
+    bed.loc[:, 'chrom']=chrom
+
+    # remove all columns with extra information that was just concatenated into a single column
+    bed = bed[['chrom', 'start', 'end', other_colnames]]
+
+    # write resulting data frame to the output file
+    bed.to_csv(output_file, header=False, index=False, sep="\t")
+
+    # save column names in a separate file with the same name and .header
+    # create a list of column names
+    col_names = list(bed.columns.values)
+    # write to a file
+    output_col_names=output_file+'.header'
+    outF=open(output_col_names, "w")
+    simplejson.dump(col_names, outF)
+    outF.close()
+
+
+def restore_columns(df): 
+    
+    # Extract the coordinate columns (everything but the concatenated columns)
+    coord_columns = df.iloc[:,:-1]
+
+    # Get the remaining non-coordinate column
+    other_columns = df.iloc[:,-1:].reset_index()
+    # Create a list of column names from the split colname
+    names = other_columns.columns.values[-1]
+    col_names = list(str(names).split('|'))
+
+    # then split values for each feature in a column
+    other_columns = other_columns.join(other_columns[names].str.split('|', expand=True))
+    other_columns = other_columns.iloc[:, 2:]
+    other_columns.columns = col_names
+    # Join the coordinate columns with the other columns
+    filled = coord_columns.join(other_columns)
+
+    return(filled)
+
+    
+def bed_to_seg(input_file, column_names, output_file):
+    # import .seg file
+    seg = pd.read_table(input_file, index_col=None, header=None, names=column_names)
+    seg.fillna('NA', inplace=True)
+    # shift start position by 1 to the right
+    seg['start'] = seg['start'].apply(lambda x: x+1)
+
+    seg = restore_columns(seg)
+
+    # rearrange columns order to match it original .seg file
+    ID_column = seg[['ID']]
+    other_columns = seg.drop(['ID'], axis=1)
+    seg = ID_column.join(other_columns)
+
+    # write resulting data frame to the output file
+    seg.to_csv(output_file, header=True, index=False, sep="\t")
+
+
+
+
+def bed_to_bedpe(bedpeA, bedpeB, column_names, output_file):
+    # import .bed files
+    bedpeA = pd.read_table(bedpeA, index_col=None, header=None, names=column_names)
+    bedpeA.fillna('NA', inplace=True)
+    # Rename columns to 
+    bedpeA.rename(columns={"chrom": "CHROM_A", "start": "START_A", "end": "END_A"}, inplace=True)
+
+    bedpeB = pd.read_table(bedpeB, index_col=None, header=None, names=column_names)
+    bedpeB.fillna('NA', inplace=True)
+    # shift start position by 1 to the right
+    bedpeB.rename(columns={"chrom": "CHROM_B", "start": "START_B", "end": "END_B"}, inplace=True)
+
+    # Join bed files to make bedpe
+
+    bedpe = pd.merge(bedpeA, bedpeB, on = column_names[-1])
+    bedpe = bedpe.iloc[:, [0, 1, 2, 4, 5, 6, 3]]
+
+    bedpe = restore_columns(bedpe)
+
+    bedpe.to_csv(output_file, header=True, index=False, sep="\t")
+
+
+def check_arguments(args, input_format):
+    if input_format in ['seg', "bedpeA", "bedpeB"] and not all([args.chromColnum, args.startColnum, args.endColnum]):
+        raise ValueError ('Must specify number of columns in segmentation file containing name of chromosome, starting and ending position of the feature')
+
+    if input_format in ['bed', 'bedpeA_bedpeB'] and not args.column_header:
+        raise ValueError ('Must specify file containing header of the BED file used for liftOver conversion')
+
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--input",
+                        help="Initial file to convert to/from .bed format", required=True)
+    parser.add_argument("--output",
+                        help="Resulting file after conversion", required=True)
+    parser.add_argument("--column-header",
+                        help="When converting from .bed to .seg format, provide file containing column headers")
+    parser.add_argument("--chromColnum", type=int,
+                        help="number of column in a segmentation file that contains information about name of chromosome")
+    parser.add_argument("--startColnum", type=int,
+                        help="number of column in a segmentation file that contains information about start position of the feature")
+    parser.add_argument("--endColnum", type=int,
+                        help="number of column in a segmentation file that contains information about end position of the feature")
+    parser.add_argument("--inType", 
+                        help="Type of input file (bedpeA, bedpeB, seg, bed, or bedpeA_bedpeB)", type=str, required=True)
+
+    args, unknown = parser.parse_known_args()
+
+    return args
+
+
+
+if __name__ == '__main__':
+    main()
diff --git a/modules/liftover/CHANGELOG.md b/modules/liftover/CHANGELOG.md
index b6b5404cc..9b8480ce4 100644
--- a/modules/liftover/CHANGELOG.md
+++ b/modules/liftover/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to the `liftover` module will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.0] - 2021-12-29
+
+This release was updated by Laura Hilton
+
+- Add support for bedpe files
+
 ## [1.2] - 2021-01-11
 
 - This version significantly improves module functionality by introduction of the following changes: support of biderectional conversion of genomic coordinates (hg19 -> hg38 and hg38 -> hg19), sorting of converted bed files, filtering converted outputs to include standard chromosomes only, and filling of the gaps with empty segments to ensure compatiability with Level 3 analyses. In addition, chain files is now generated through reference_files workflow and therefore there is no need to include it with the module `src`.
diff --git a/modules/lofreq/1.0/lofreq.smk b/modules/lofreq/1.0/lofreq.smk
index d109b7294..03369bb50 100644
--- a/modules/lofreq/1.0/lofreq.smk
+++ b/modules/lofreq/1.0/lofreq.smk
@@ -26,8 +26,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -63,9 +65,9 @@ rule _lofreq_input_bam:
         bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai",
         crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bai)
-        op.relative_symlink(input.bai, output.crai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bai, output.crai)
 
 
 # Run LoFreq in somatic variant calling mode
diff --git a/modules/lofreq/1.1/config/default.yaml b/modules/lofreq/1.1/config/default.yaml
index 948d71887..6eca82147 100644
--- a/modules/lofreq/1.1/config/default.yaml
+++ b/modules/lofreq/1.1/config/default.yaml
@@ -19,8 +19,8 @@ lcr-modules:
             #                      {normal_id} {pair_status}
             regions_bed:
                 _default: ""
-                # Set to "" if you have no capture data
-                capture: "-l __UPDATE__"
+                # Leave both "" if you have no capture data. 
+                capture: ""
 
         conda_envs:
             lofreq: "{MODSDIR}/envs/lofreq-2.1.5.yml"
diff --git a/modules/lofreq/1.1/lofreq.smk b/modules/lofreq/1.1/lofreq.smk
index f7e011d9b..4a8e57c20 100644
--- a/modules/lofreq/1.1/lofreq.smk
+++ b/modules/lofreq/1.1/lofreq.smk
@@ -15,7 +15,7 @@
 import oncopipe as op
 
 # Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
-min_oncopipe_version="1.0.11"
+min_oncopipe_version="1.0.12"
 import pkg_resources
 try:
     from packaging import version
@@ -43,21 +43,42 @@ CFG = op.setup_module(
 SCRIPT_PATH = CFG['inputs']['src_dir']
 #this is used in place of the shell.prefix() because that was not working consistently. This is not ideal. 
 
+#obtain default bed and update config
+bed = str(reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed"))
+CFG['switches']['regions_bed']['_default'] = bed
 
-sample_ids = list(CFG['samples']['sample_id'])
+sample_ids = list(config['lcr-modules']['_shared']['samples']['sample_id'])
 unmatched_normal_ids = list(config["lcr-modules"]["_shared"]["unmatched_normal_ids"].values())
-
 all_other_ids = list(set(sample_ids) - set(unmatched_normal_ids))
 
-
-
-
 # Define rules to be run locally when using a compute cluster
 localrules:
     _lofreq_input_bam,
     _lofreq_output_vcf,
     _lofreq_all,
-
+    _lofreq_link_to_preprocessed
+
+def _lofreq_get_capspace(wildcards):
+    CFG=config["lcr-modules"]["lofreq"]
+    default_bed = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed")
+    if str(wildcards.seq_type) in CFG['switches']['regions_bed'].keys():
+        custom_bed = CFG['switches']['regions_bed'][wildcards.seq_type]
+    else:
+        custom_bed = default_bed
+    try:
+        if "tumour_id" in wildcards.keys():
+        # Get the appropriate capture space for this sample
+            this_bed = op.get_capture_space(CFG, wildcards.tumour_id, wildcards.genome_build, wildcards.seq_type, "bed")
+        else:
+            this_bed = op.get_capture_space(CFG, wildcards.normal_id, wildcards.genome_build, wildcards.seq_type, "bed")
+        this_bed = reference_files(this_bed)
+    except NameError:
+        # If we are using an older version of the reference workflow, use the same region file as the genome sample 
+        this_bed = custom_bed if custom_bed else default_bed
+    # If this is a genome sample, return a BED file listing all chromosomes
+    if wildcards.seq_type != "capture":
+        return custom_bed if custom_bed else default_bed
+    return this_bed
 
 ##### RULES #####
 
@@ -76,20 +97,19 @@ rule _lofreq_input_bam:
         op.relative_symlink(input.bai, output.bai)
         op.relative_symlink(input.bai, output.crai)
 
-
 # Run LoFreq in somatic variant calling mode on a single unmatched pair to produce normal_relaxed.vcf.gz and normal_stringent vcfs
 # generate an empty file named preprocessing_complete to indicate that the run actually completed and it's safe to symlink to the outputs
 # This rule falsely provides the normal bam as both the tumour and normal to get around the requirement of two bams for input to lofreq somatic
-rule _lofreq_preprocess_normal_unmatched:
+rule _lofreq_preprocess_normal:
     input:
         normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
         dbsnp = reference_files("genomes/{genome_build}/variation/dbsnp.common_all-151.vcf.gz"), #in our experience, this filter doesn't remove as many SNPs as one would expect
-        bed = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed")
+        bed = _lofreq_get_capspace
     output:
-        out_dir = directory(CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/"),
         preprocessing_start = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/preprocessing.started",
-        vcf_relaxed = temp(CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_relaxed.vcf.gz"),
+        vcf_relaxed = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_relaxed.vcf.gz",
+        vcf_relaxed_tbi = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_relaxed.vcf.gz.tbi",
         vcf_indels_stringent = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_stringent.indels.vcf.gz",
         vcf_snvs_stringent = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_stringent.snvs.vcf.gz",
         normal_relaxed_log = CFG["dirs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/normal_relaxed.log",
@@ -98,14 +118,15 @@ rule _lofreq_preprocess_normal_unmatched:
         stdout = CFG["logs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/lofreq_pre.stdout.log",
         stderr = CFG["logs"]["lofreq_normal"] + "{seq_type}--{genome_build}/{normal_id}/lofreq_pre.stderr.log"
     params:
-        opts = CFG["options"]["lofreq"],
-        regions = op.switch_on_wildcard("seq_type", CFG["switches"]["regions_bed"]),
+        opts = CFG["options"]["lofreq"]
     conda:
         CFG["conda_envs"]["lofreq"]
     threads:
         CFG["threads"]["lofreq"]
     resources:
         **CFG["resources"]["lofreq"]
+    wildcard_constraints:
+        normal_id="|".join(unmatched_normal_ids)
     shell:
         op.as_one_line("""
         SCRIPT_PATH={SCRIPT_PATH};
@@ -116,12 +137,41 @@ rule _lofreq_preprocess_normal_unmatched:
             touch {output.preprocessing_start}
             && 
             lofreq somatic --normal_only {params.opts} --threads {threads} -t {input.normal_bam} -n {input.normal_bam}
-            -f {input.fasta} -o {output.out_dir}/ -d {input.dbsnp} --bed {input.bed}
+            -f {input.fasta} -o $(dirname {output.vcf_relaxed})/ -d {input.dbsnp} --bed {input.bed}
             > {log.stdout} 2> {log.stderr} && 
             touch {output.preprocessing_complete};
         else echo "WARNING: PATH is not set properly, using $(which lofreq2_call_pparallel.py)"; fi
         """)
 
+
+rule _lofreq_link_to_preprocessed:
+    input:
+        vcf_relaxed = str(rules._lofreq_preprocess_normal.output.vcf_relaxed),
+        vcf_relaxed_tbi = str(rules._lofreq_preprocess_normal.output.vcf_relaxed_tbi),
+        vcf_indels_stringent = str(rules._lofreq_preprocess_normal.output.vcf_indels_stringent),
+        vcf_snvs_stringent = str(rules._lofreq_preprocess_normal.output.vcf_snvs_stringent),
+        normal_relaxed_log = str(rules._lofreq_preprocess_normal.output.normal_relaxed_log),
+        preprocessing_complete = str(rules._lofreq_preprocess_normal.output.preprocessing_complete)
+    output:
+        vcf_relaxed = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_relaxed.vcf.gz",
+        vcf_relaxed_tbi = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_relaxed.vcf.gz.tbi",
+        vcf_indels_stringent = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_stringent.indels.vcf.gz",
+        vcf_snvs_stringent = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_stringent.snvs.vcf.gz",
+        normal_relaxed_log = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_relaxed.log",
+        preprocessing_complete = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/preprocessing.complete"
+    wildcard_constraints: 
+        pair_status = "unmatched"
+    run:
+        op.relative_symlink(input.vcf_relaxed, output.vcf_relaxed, in_module=True)
+        op.relative_symlink(input.vcf_relaxed_tbi, output.vcf_relaxed_tbi, in_module=True)
+        op.relative_symlink(input.vcf_indels_stringent, output.vcf_indels_stringent, in_module=True)
+        op.relative_symlink(input.vcf_indels_stringent + ".tbi", output.vcf_indels_stringent + ".tbi", in_module=True)
+        op.relative_symlink(input.vcf_snvs_stringent, output.vcf_snvs_stringent, in_module=True)
+        op.relative_symlink(input.vcf_snvs_stringent + ".tbi", output.vcf_indels_stringent + ".tbi", in_module=True)
+        op.relative_symlink(input.normal_relaxed_log, output.normal_relaxed_log, in_module=True)
+        op.relative_symlink(input.preprocessing_complete, output.preprocessing_complete, in_module=True)
+
+
 # Run LoFreq in somatic variant calling mode for unpaired tumours using precomputed SNV positions from unmatched normal
 # For unmatched tumours we need to symlink all the files from the preprocessing to the pair's output directory
 # Matched and unmatched tumours are handled identically here. The normal is only run once and all unmatched tumours that rely on it
@@ -129,18 +179,14 @@ rule _lofreq_preprocess_normal_unmatched:
 # The creation of symbolic links in this rule could probably be separated out into a rule that uses Oncopipe's relative_symlink
 rule _lofreq_run_tumour_unmatched:
     input:
-        vcf_relaxed = rules._lofreq_preprocess_normal_unmatched.output.vcf_relaxed,
-        vcf_indels_stringent = rules._lofreq_preprocess_normal_unmatched.output.vcf_indels_stringent,
-        vcf_snvs_stringent = rules._lofreq_preprocess_normal_unmatched.output.vcf_snvs_stringent,
-        normal_relaxed_log = rules._lofreq_preprocess_normal_unmatched.output.normal_relaxed_log,
-        preprocessing_complete = rules._lofreq_preprocess_normal_unmatched.output.preprocessing_complete,
+        preprocessing_complete = str(rules._lofreq_link_to_preprocessed.output.preprocessing_complete),
         tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
         normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
+        vcf_relaxed = str(rules._lofreq_link_to_preprocessed.output.vcf_relaxed),
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
         dbsnp = reference_files("genomes/{genome_build}/variation/dbsnp.common_all-151.vcf.gz"), #in our experience, this filter doesn't remove as many SNPs as one would expect
-        bed = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed")
+        bed = _lofreq_get_capspace
     output:
-        out_dir = directory(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/"),
         vcf_snvs_filtered = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final_minus-dbsnp.snvs.vcf.gz",
         vcf_indels_filtered = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final_minus-dbsnp.indels.vcf.gz",
         vcf_snvs_all = CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final.snvs.vcf.gz",
@@ -156,15 +202,10 @@ rule _lofreq_run_tumour_unmatched:
         CFG["threads"]["lofreq"]
     resources:
         **CFG["resources"]["lofreq"]
+    wildcard_constraints: 
+        pair_status = "unmatched" 
     shell:
         op.as_one_line("""
-        ln -f {input.vcf_relaxed} {output.out_dir};
-        ln -f {input.vcf_relaxed}.tbi {output.out_dir};
-        ln -f {input.vcf_indels_stringent} {output.out_dir};
-        ln -f {input.vcf_indels_stringent}.tbi {output.out_dir};
-        ln -f {input.vcf_snvs_stringent} {output.out_dir};
-        ln -f {input.vcf_snvs_stringent}.tbi {output.out_dir};
-        ln -f {input.normal_relaxed_log} {output.out_dir};
         SCRIPT_PATH={SCRIPT_PATH};
         PATH=$SCRIPT_PATH:$PATH;
         SCRIPT="$SCRIPT_PATH/lofreq2_call_pparallel.py";
@@ -172,10 +213,48 @@ rule _lofreq_run_tumour_unmatched:
             echo "using bundled patched script $SCRIPT";
             lofreq somatic --continue {params.opts} --threads {threads} -t {input.tumour_bam} -n {input.normal_bam}
             -f {input.fasta} -o $(dirname {output.vcf_snvs_filtered})/ -d {input.dbsnp} --bed {input.bed}
-            > {log.stdout} 2> {log.stderr} && rm -f {input.vcf_relaxed} && rm -f {output.out_dir}/normal_relaxed.vcf.gz ;
+            > {log.stdout} 2> {log.stderr};
         else echo "WARNING: PATH is not set properly, using $(which lofreq2_call_pparallel.py)"; fi
         """)
 
+rule _lofreq_run_tumour_matched:
+    input:
+        tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
+        normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
+        fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
+        dbsnp = reference_files("genomes/{genome_build}/variation/dbsnp.common_all-151.vcf.gz"), #in our experience, this filter doesn't remove as many SNPs as one would expect
+        bed = _lofreq_get_capspace
+    output:
+        vcf_relaxed = temp(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/normal_relaxed.vcf.gz"),
+        vcf_snvs_filtered = temp(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final_minus-dbsnp.snvs.vcf.gz"),
+        vcf_indels_filtered = temp(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final_minus-dbsnp.indels.vcf.gz"),
+        vcf_snvs_all = temp(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final.snvs.vcf.gz"),
+        vcf_indels_all = temp(CFG["dirs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final.indels.vcf.gz")
+    log:
+        stdout = CFG["logs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/lofreq.stdout.log",
+        stderr = CFG["logs"]["lofreq_somatic"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/lofreq.stderr.log"
+    params:
+        opts = CFG["options"]["lofreq"]
+    conda:
+        CFG["conda_envs"]["lofreq"]
+    threads:
+        CFG["threads"]["lofreq"]
+    resources:
+        **CFG["resources"]["lofreq"]
+    wildcard_constraints: 
+        pair_status = "matched"
+    shell:
+        op.as_one_line("""
+        SCRIPT_PATH={SCRIPT_PATH};
+        PATH=$SCRIPT_PATH:$PATH;
+        SCRIPT="$SCRIPT_PATH/lofreq2_call_pparallel.py";
+        if [[ $(which lofreq2_call_pparallel.py) =~ $SCRIPT ]]; then 
+            echo "using bundled patched script $SCRIPT";
+            lofreq somatic {params.opts} --threads {threads} -t {input.tumour_bam} -n {input.normal_bam}
+            -f {input.fasta} -o $(dirname {output.vcf_snvs_filtered})/ -d {input.dbsnp} --bed {input.bed}
+            > {log.stdout} 2> {log.stderr};
+        else echo "WARNING: PATH is not set properly, using $(which lofreq2_call_pparallel.py)"; fi
+        """)
 
 # indels are not yet called but this rule merges the empty indels file with the snvs file to produce the consistently named "combined" vcf. 
 rule _lofreq_combine_vcf:
@@ -210,8 +289,8 @@ rule _lofreq_combine_vcf:
 
 rule _lofreq_filter_vcf:
     input:
-        vcf_all = rules._lofreq_combine_vcf.output.vcf_all,
-        vcf_all_filtered = rules._lofreq_combine_vcf.output.vcf_all_filtered
+        vcf_all = str(rules._lofreq_combine_vcf.output.vcf_all),
+        vcf_all_filtered = str(rules._lofreq_combine_vcf.output.vcf_all_filtered)
     output:
         vcf_all_clean = CFG["dirs"]["filtered"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final.combined.filtered.vcf.gz",
         vcf_all_filtered_clean = CFG["dirs"]["filtered"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/somatic_final_minus-dbsnp.combined.filtered.vcf.gz",
@@ -236,8 +315,8 @@ rule _lofreq_filter_vcf:
 # Symlinks the final output files into the module results directory (under '99-outputs/')
 rule _lofreq_output_vcf:
     input:
-        vcf_all = rules._lofreq_filter_vcf.output.vcf_all_clean,
-        vcf_all_filtered = rules._lofreq_filter_vcf.output.vcf_all_filtered_clean
+        vcf_all = str(rules._lofreq_filter_vcf.output.vcf_all_clean),
+        vcf_all_filtered = str(rules._lofreq_filter_vcf.output.vcf_all_filtered_clean)
     output:
         vcf_all = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.lofreq.snvs.vcf.gz",
         vcf_all_filtered = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}_minus-dbsnp.lofreq.snvs.vcf.gz"
@@ -253,8 +332,8 @@ rule _lofreq_all:
     input:
         expand(
             [
-                rules._lofreq_output_vcf.output.vcf_all,
-                rules._lofreq_output_vcf.output.vcf_all_filtered,
+                str(rules._lofreq_output_vcf.output.vcf_all),
+                str(rules._lofreq_output_vcf.output.vcf_all_filtered),
             ],
             zip,  # Run expand() with zip(), not product()
             seq_type=CFG["runs"]["tumour_seq_type"],
diff --git a/modules/lofreq/1.1/src/lofreq2_call_pparallel.py b/modules/lofreq/1.1/src/lofreq2_call_pparallel.py
index 2fa76c9a4..e3728554e 100755
--- a/modules/lofreq/1.1/src/lofreq2_call_pparallel.py
+++ b/modules/lofreq/1.1/src/lofreq2_call_pparallel.py
@@ -574,7 +574,9 @@ def main():
         # argument
         bam_sqs = set([b[0] for b in bam_bins])
         bed_sqs = set([b[0] for b in bed_bins])
-        if len(bed_bins) > 100*len(bam_bins) and len(bed_sqs) > len(bam_sqs)/10.0:
+        #edited to handle edge case in which a bam file header has tons of contigs.
+        #This can lead to "argument list too long" in a latter step
+        if len(bed_bins) > 50*len(bam_bins) or len(bed_bins) > 200000:
             bed_sqs = set([b[0] for b in bed_bins])
             bins = [b for b in bam_bins if b[0] in bed_sqs]
             lofreq_call_args.extend(['-l', bed_file])
diff --git a/modules/lofreq/CHANGELOG.md b/modules/lofreq/CHANGELOG.md
index 7be5302c8..8a1dd3f1d 100644
--- a/modules/lofreq/CHANGELOG.md
+++ b/modules/lofreq/CHANGELOG.md
@@ -5,6 +5,9 @@ All notable changes to the `lofreq` module will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.1] - 2021-05-12
+- the last revision introduced a bug that caused the pipeline to prematurely delete a file and fail when the missing file was noticed. This bug has been fixed. This revision also fixes an edge case in Lofreq that was causing certain bams to fail. The cause was over 2000 contigs in the bam header due to a non-standard reference genome.
+
 ## [1.1] - 2021-05-04
 - all previous versions ran the pre-processing of the normal and tumour bams in one rule. I've modified two of the lofreq scripts to allow each step to be run separately. 
 - Now, preprocessing of the normal is done once for each normal (i.e. unmatched normal is not processed per pair). This should enhance the efficiency when running many samples in unmatched-normal mode.
diff --git a/modules/lymphgen/1.0/config/default.yaml b/modules/lymphgen/1.0/config/default.yaml
new file mode 100644
index 000000000..53fc68ca9
--- /dev/null
+++ b/modules/lymphgen/1.0/config/default.yaml
@@ -0,0 +1,39 @@
+lcr-modules:
+    
+    lymphgen:
+
+        inputs:
+            # Available wildcards: None (specify your merged input MAF/SEG manually)
+            sample_maf: "__UPDATE__"
+            sample_seg: "__UPDATE__"
+            sample_sv_info: "__UPDATE__"
+            # Path to the directory where LGenIC should be installed
+            lgenic_exec: "__UPDATE__"
+
+        options:
+            outprefix: "my_cohort"
+            reformat_seg:  #  Default columns for Battenberg output
+                Tumor_Sample_Barcode: "ID"
+                chromosome: "chrom"
+                start: "start"
+                end: "end"
+                CN: "log.ratio"
+            add_svs:  # Use "" if N/A (you are not providing SVs)
+                samplecol: "__UPDATE__"
+                bcl2col: "__UPDATE__"
+                # Note any field value not present in either true/false is considered NA
+                bcl2truevalues: "__UPDATE__" # Can use a list here
+                bcl2falsevalues: "__UPDATE__"
+                bcl6col: "__UPDATE__"
+                # Note any field value not present in either true/false is considered NA
+                bcl6truevalues: "__UPDATE__"  # Can use a list
+                bcl6falsevalues: "__UPDATE__"
+            lymphgen_input:
+                seq_type: "genome"  # Availible options: exome/genome (they do the same thing) and targeted (for panels)
+                use_log_ratio: "__UPDATE__"
+            lymphgen_run:
+                lymphgen_path: "/projects/rmorin/software/lcr-scripts/LymphGen/Run_Lymphgen.R"
+
+        conda_envs:
+            optparse: "{MODSDIR}/envs/r-optparse-1.6.yaml"
+            sorted_containers: "{MODSDIR}/envs/sortedcontainers-2.4.0.yaml"
diff --git a/modules/lymphgen/1.0/envs/r-optparse-1.6.yaml b/modules/lymphgen/1.0/envs/r-optparse-1.6.yaml
new file mode 100644
index 000000000..4e4ea31dc
--- /dev/null
+++ b/modules/lymphgen/1.0/envs/r-optparse-1.6.yaml
@@ -0,0 +1,6 @@
+name: r-base
+channels:
+    - r
+dependencies:
+    - r-base = 3.6
+    - r-optparse = 1.6
diff --git a/modules/lymphgen/1.0/envs/sortedcontainers-2.4.0.yaml b/modules/lymphgen/1.0/envs/sortedcontainers-2.4.0.yaml
new file mode 100644
index 000000000..ca6e44a01
--- /dev/null
+++ b/modules/lymphgen/1.0/envs/sortedcontainers-2.4.0.yaml
@@ -0,0 +1,7 @@
+name: sortedcontainers
+channels:
+    - anaconda
+    - defaults
+dependencies:
+    - sortedcontainers=2.4.0
+prefix: /home/anaconda3/envs/sortedcontainers/
diff --git a/modules/lymphgen/1.0/lymphgen.smk b/modules/lymphgen/1.0/lymphgen.smk
new file mode 100644
index 000000000..c5e1e06a8
--- /dev/null
+++ b/modules/lymphgen/1.0/lymphgen.smk
@@ -0,0 +1,394 @@
+#!/usr/bin/env snakemake
+import os
+import pandas
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Chris Rushton
+# Module Author:    Chris Rushton
+# Contributors:     NA
+
+# LymphGen classifier writen by George Wright
+# DO NOT DISTRIBUTE THE LYMPHGEN SOURCE CODE WITHOUT GEORGE'S PERMISSION
+# This module has been brought to you by Krysta's Girl Guide cookies. Helping me
+# miss all weight loss goals since 2018
+
+##### SETUP #####
+
+
+# Import package with useful functions for developing analysis modules
+import oncopipe as op
+
+# Setup module and store module-specific configuration in `CFG`
+# `CFG` is a shortcut to `config["lcr-modules"]["lymphgen"]`
+CFG = op.setup_module(
+    name = "lymphgen",
+    version = "1.0",
+    subdirectories = ["inputs", "reformat_seg", "lymphgen_input", "add_svs", "lymphgen_run", "outputs"],
+)
+
+# Define rules to be run locally when using a compute cluster
+# I put everything under this, since these rules don't take very long
+localrules:
+    _install_lgenic,
+    _lymphgen_input_cnv,
+    _lymphgen_input_no_cnv,
+    _lymphgen_add_sv,
+    _lymphgen_add_sv_blank,
+    _lymphgen_run_cnv,
+    _lymphgen_run_no_cnv,
+    _lymphgen_reformat_seg,
+    _lymphgen_output_txt,
+    _lymphgen_all,
+
+
+# Sanitize input
+lgenic_path = CFG["inputs"]["lgenic_exec"]
+if not lgenic_path.endswith(os.sep) and lgenic_path != "":
+    CFG["inputs"]["lgenic_exec"] = CFG["inputs"]["lgenic_exec"] + os.sep
+
+
+##### RULES #####
+
+outprefix = CFG["options"]["outprefix"]
+
+# DOWNLOAD CHRIS'S VERY AWESOME LYMPHGEN CONVERSION SCRIPT. ALL CAPS
+rule _install_lgenic:
+    params:
+        lgenic_dir = CFG["inputs"]["lgenic_exec"]
+    output:
+        lgenic_script = CFG["inputs"]["lgenic_exec"] + "generate_input.py",
+        lymphgen_genes = CFG["inputs"]["lgenic_exec"] + "resources" + os.sep + "lymphgen_genes.txt",
+        hugo2entrez = CFG["inputs"]["lgenic_exec"] + "resources" + os.sep + "hugo2entrez.tsv",
+        gene_coords = CFG["inputs"]["lgenic_exec"] + "resources" + os.sep + "gene_coordinates.GRCh37.bed6",
+        arm_coords = CFG["inputs"]["lgenic_exec"] + "resources" + os.sep + "chrom_arm.hg19.tsv"
+    shell:
+        '''
+        download_url=$(curl --silent "https://api.github.com/repos/ckrushton/LGenIC/releases/latest" | grep 'tarball_url' | sed 's/.*:[ ]//' | sed 's/,$//' | sed 's/"//g');
+        mkdir -p {params.lgenic_dir};
+
+        wget -cO - $download_url > {params.lgenic_dir}/LGenIC.tar.gz && tar -C {params.lgenic_dir} -xf {params.lgenic_dir}/LGenIC.tar.gz && rm {params.lgenic_dir}/LGenIC.tar.gz;
+        mv {params.lgenic_dir}/ckrushton-LGenIC-*/* {params.lgenic_dir}/ && rm -r {params.lgenic_dir}/ckrushton-LGenIC-*/;
+        '''
+
+# STEP 1: INPUT SYMLINKS
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _lymphgen_input_maf:
+    input:
+        maf = CFG["inputs"]["sample_maf"]
+    output:
+        maf = CFG["dirs"]["inputs"] + "maf/" + outprefix + ".maf"
+    run:
+        op.relative_symlink(input.maf, output.maf)
+
+rule _lymphgen_input_seg:
+    input:
+        seg = CFG["inputs"]["sample_seg"]
+    output:
+        seg = CFG["dirs"]["inputs"] + "seg/input.seg"
+    run:
+        op.relative_symlink(input.seg, output.seg)
+
+
+# STEP 2: REFORMAT SEG FILE
+# Make sure the SEG columns are consistent
+rule _lymphgen_reformat_seg:
+    input:
+        seg = str(rules._lymphgen_input_seg.output.seg)
+    output:
+        seg = CFG["dirs"]["reformat_seg"] + outprefix + "reformat.seg"
+    params:
+        tumor_sample_barcode_name = CFG["options"]["reformat_seg"]["Tumor_Sample_Barcode"],
+        chromosome_name = CFG["options"]["reformat_seg"]["chromosome"],
+        start_name = CFG["options"]["reformat_seg"]["start"],
+        end_name = CFG["options"]["reformat_seg"]["end"],
+        cn_name = CFG["options"]["reformat_seg"]["CN"]
+    run:
+
+        loaded_seg = pandas.read_csv(input.seg, sep="\t")
+        seg_header = list(loaded_seg.columns)
+        # Rename relevant columns
+        new_cols = {
+            "Tumor_Sample_Barcode": params.tumor_sample_barcode_name,
+            "chromosome": params.chromosome_name,
+            "start": params.start_name,
+            "end": params.end_name,
+            "CN": params.cn_name
+        }
+
+        for new_name, old_name in new_cols.items():
+            try:
+                loc = seg_header.index(old_name)
+            except ValueError as e:
+                raise AttributeError(f"Unable to locate column {old_name} in the SEG file {input.seg}") from e
+            seg_header[loc] = new_name
+
+        # Write out the renamed SEG file
+        loaded_seg.columns = seg_header
+        loaded_seg.to_csv(output.seg, sep="\t", header=True, index=False)
+
+
+# STEP 3: REFORMAT INPUT TO RUN LYMPHGEN
+# Reformats MAF/SEG SNV/CNV calls for LymphGen using my LGenIC script
+
+# With CNVs
+rule _lymphgen_input_cnv:
+    input:
+        maf = str(rules._lymphgen_input_maf.output.maf),
+        seg = str(rules._lymphgen_reformat_seg.output.seg),
+        # Software and resource dependencies from LGenIC
+        lgenic_script = str(rules._install_lgenic.output.lgenic_script),
+        lymphgen_genes = str(rules._install_lgenic.output.lymphgen_genes),
+        hugo2entrez = str(rules._install_lgenic.output.hugo2entrez),
+        gene_coords = str(rules._install_lgenic.output.gene_coords),
+        arm_coords = str(rules._install_lgenic.output.arm_coords),
+    output:
+        sample_annotation = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_sample_annotation.tsv",
+        mutation_flat = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_mutation_flat.tsv",
+        gene_list = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_gene_list.txt",
+        cnv_flat = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_cnv_flat.tsv",
+        cnv_arm = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_cnv_arm.tsv"
+    log:
+        stdout = CFG["logs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}.LGenIC.stdout.log",
+        stderr = CFG["logs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}.LGenIC.stderr.log"
+    params:
+        seq_type = CFG["options"]["lymphgen_input"]["seq_type"],
+        outprefix = "{outprefix}.{cnvs_wc}",
+        logratio = "--log2" if CFG["options"]["lymphgen_input"]["use_log_ratio"].lower() == "true" else ""
+    conda:
+        CFG['conda_envs']['sorted_containers']
+    wildcard_constraints:
+        cnvs_wc = "with_cnvs"
+    shell:
+        op.as_one_line("""
+        python {input.lgenic_script} --lymphgen_genes {input.lymphgen_genes} --sequencing_type {params.seq_type} --outdir $(dirname {output.sample_annotation})
+        --outprefix {params.outprefix} -v INFO --maf {input.maf} --entrez_ids {input.hugo2entrez} --cnvs {input.seg} {params.logratio} --genes {input.gene_coords} --arms {input.arm_coords}
+        > {log.stdout} 2> {log.stderr}
+        """)
+
+# No CNVs
+rule _lymphgen_input_no_cnv:
+    input:
+        maf = str(rules._lymphgen_input_maf.output.maf),
+        # Software and resource dependencies from LGenIC
+        lgenic_script = str(rules._install_lgenic.output.lgenic_script),
+        lymphgen_genes = str(rules._install_lgenic.output.lymphgen_genes),
+        hugo2entrez = str(rules._install_lgenic.output.hugo2entrez),
+    output:
+        sample_annotation = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_sample_annotation.tsv",
+        mutation_flat = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_mutation_flat.tsv",
+        gene_list = CFG["dirs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}_gene_list.txt",
+    log:
+        stdout = CFG["logs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}.LGenIC.stdout.log",
+        stderr = CFG["logs"]["lymphgen_input"] + "{outprefix}.{cnvs_wc}.LGenIC.stderr.log"
+    params:
+        seq_type = CFG["options"]["lymphgen_input"]["seq_type"],
+        outprefix = "{outprefix}.{cnvs_wc}"
+    conda:
+        CFG['conda_envs']['sorted_containers']
+    wildcard_constraints:
+        cnvs_wc = "no_cnvs"
+    shell:
+        op.as_one_line("""
+        python {input.lgenic_script} --lymphgen_genes {input.lymphgen_genes} --sequencing_type {params.seq_type} --outdir $(dirname {output.sample_annotation})
+        --outprefix {params.outprefix} -v INFO --maf {input.maf} --entrez_ids {input.hugo2entrez} > {log.stdout} 2> {log.stderr}
+        """)
+
+# STEP 4: Add SV information (if availible)
+
+rule _lymphgen_add_sv:
+    input:
+        sample_annotation = str(rules._lymphgen_input_cnv.output.sample_annotation),
+        bcl2_bcl6_sv = CFG["inputs"]["sample_sv_info"]
+    output:
+        sample_annotation = CFG["dirs"]["add_svs"] + "{outprefix}_sample_annotation.{cnvs_wc}.{sv_wc}.tsv"
+    params:
+        sampleIDcolname = CFG["options"]["add_svs"]["samplecol"],
+        bcl2colname = CFG["options"]["add_svs"]["bcl2col"],
+        bcl2true = CFG["options"]["add_svs"]["bcl2truevalues"],
+        bcl2false = CFG["options"]["add_svs"]["bcl2falsevalues"],
+        bcl6colname = CFG["options"]["add_svs"]["bcl6col"],
+        bcl6true = CFG["options"]["add_svs"]["bcl6truevalues"],
+        bcl6false = CFG["options"]["add_svs"]["bcl6falsevalues"]
+    wildcard_constraints:
+        sv_wc = "with_sv"
+    run:
+
+        # Sanitize input fields
+        # Since the input could be either a string or an iterable, and because python will split a string into individual letters,
+        # lets make sure all input options are a set
+        params.bcl2true = set([params.bcl2true] if isinstance(params.bcl2true, str) else params.bcl2true)
+        params.bcl2false = set([params.bcl2false] if isinstance(params.bcl2false, str) else params.bcl2false)
+        params.bcl6true = set([params.bcl6true] if isinstance(params.bcl6true, str) else params.bcl6true)
+        params.bcl6false = set([params.bcl6false] if isinstance(params.bcl6false, str) else params.bcl6false)
+
+        # Open the SV info file, and load the required columns
+        loaded_sv = pandas.read_csv(input.bcl2_bcl6_sv, sep="\t")  # Pandas claims to auto-detect the file seperator, but in my experience it doesn't work
+        # Check that the required columns exist
+        if not params.bcl2colname in loaded_sv.columns:
+            raise AttributeError("Unable to locate column \'%s\' in \'%s\'" % (params.bcl2colname, input.bcl2_bcl6_sv))
+        elif not params.bcl6colname in loaded_sv.columns:
+            raise AttributeError("Unable to locate column \'%s\' in \'%s\'" % (params.bcl6colname,  input.bcl2_bcl6_sv))
+        elif not params.sampleIDcolname in loaded_sv.columns:
+            raise AttributeError("Unable to locate column \'%s\' in \'%s\'" % (params.sampleIDcolname, input.bcl2_bcl6_sv))
+
+        # Add SV info to sample annotation file
+        with open(input.sample_annotation) as f, open(output.sample_annotation, "w") as o:
+            for line in f:
+                line = line.rstrip("\n").rstrip("\r")  # Handle line endings
+                if line.startswith("Sample.ID\tCopy.Number"):  # Header line
+                    o.write(line)
+                    o.write(os.linesep)
+                    continue
+
+                cols = line.split("\t")
+                try:
+                    sampleID = cols[0]
+                    copynum = cols[1]
+                except IndexError as e:
+                    raise AttributeError("Input sample annotaion file \'%s\' appears to be malformed" % input.sample_annotation) from e
+                # Find the matching SampleID in the SV annotation file
+                sampleEntry = loaded_sv.loc[loaded_sv[params.sampleIDcolname] == sampleID]
+                # BCL2
+                try:
+                    bcl2status = sampleEntry[params.bcl2colname].tolist()[0]
+                    # Check to see if BCL2 is translocated or not
+                    if bcl2status in params.bcl2true:
+                        bcl2trans = "1"
+                    elif bcl2status in params.bcl2false:
+                        bcl2trans = "0"
+                    else:
+                        bcl2trans = "NA"
+                except IndexError:  # i.e. This sample isn't in the annotation file. set it as NA
+                    bcl2trans = "NA"
+                # BCL6
+                try:
+                    bcl6status = sampleEntry[params.bcl6colname].tolist()[0]
+                    # Check if BCL6 is translocated
+                    if bcl6status in params.bcl6true:
+                        bcl6trans = "1"
+                    elif bcl6status in params.bcl6false:
+                        bcl6trans = "0"
+                    else:
+                        bcl6trans = "NA"
+                except IndexError:  # This sample isn't in the annotation file
+                    bcl6trans = "NA"
+
+                # Write the revised sample annotation entry
+                outline = [sampleID, copynum, bcl2trans, bcl6trans]
+                o.write("\t".join(outline))
+                o.write(os.linesep)
+
+
+# Since we don't have any SV info, just symlink sample annotation file
+rule _lymphgen_add_sv_blank:
+    input:
+        sample_annotation = str(rules._lymphgen_input_cnv.output.sample_annotation)
+    output:
+        sample_annotation = CFG["dirs"]["add_svs"] + "{outprefix}_sample_annotation.{cnvs_wc}.{sv_wc}.tsv"
+    wildcard_constraints:
+        sv_wc = "no_sv"
+    run:
+        op.relative_symlink(input.sample_annotation, output.sample_annotation)
+
+
+# STEP 5: RUN LYMPHGEN
+
+def _get_sample_annotation(wildcards):
+    if wildcards.sv_wc == "has_sv":
+        return str(rules._lymphgen_add_sv.output.sample_annotation)
+    else:
+        return str(rules._lymphgen_add_sv_blank.output.sample_annotation)
+
+# With CNVs
+rule _lymphgen_run_cnv:
+    input:
+        sample_annotation = _get_sample_annotation,
+        mutation_flat = str(rules._lymphgen_input_cnv.output.mutation_flat),
+        gene_list = str(rules._lymphgen_input_cnv.output.gene_list),
+        cnv_flat = str(rules._lymphgen_input_cnv.output.cnv_flat),
+        cnv_arm = str(rules._lymphgen_input_cnv.output.cnv_arm)
+    output:
+        result = CFG["dirs"]["lymphgen_run"] + "{outprefix}.results.{cnvs_wc}.{sv_wc}.tsv"
+    log:
+        stderr = CFG["logs"]["lymphgen_run"] + "{outprefix}.lymphgen.{cnvs_wc}.{sv_wc}.stderr.log",
+        stdout = CFG["logs"]["lymphgen_run"] + "{outprefix}.lymphgen.{cnvs_wc}.{sv_wc}.stdout.log"
+    params:
+        lymphgen_path = CFG["options"]["lymphgen_run"]["lymphgen_path"]
+    conda:
+        CFG['conda_envs']['optparse']
+    wildcard_constraints:
+        cnvs_wc = "with_cnvs"
+    shell:
+        op.as_one_line("""
+        Rscript {params.lymphgen_path} -m {input.mutation_flat} -s {input.sample_annotation} -g {input.gene_list} -c {input.cnv_flat}
+        -a {input.cnv_arm} -o {output.result} > {log.stdout} 2> {log.stderr} """)
+
+# No CNVs
+rule _lymphgen_run_no_cnv:
+    input:
+        sample_annotation = _get_sample_annotation,
+        mutation_flat = str(rules._lymphgen_input_no_cnv.output.mutation_flat),
+        gene_list = str(rules._lymphgen_input_no_cnv.output.gene_list)
+    output:
+        result = CFG["dirs"]["lymphgen_run"] + "{outprefix}.results.{cnvs_wc}.{sv_wc}.tsv"
+    log:
+        stderr = CFG["logs"]["lymphgen_run"] + "{outprefix}.lymphgen.{cnvs_wc}.{sv_wc}.stderr.log",
+        stdout = CFG["logs"]["lymphgen_run"] + "{outprefix}.lymphgen.{cnvs_wc}.{sv_wc}.stdout.log"
+    params:
+        lymphgen_path = CFG["options"]["lymphgen_run"]["lymphgen_path"]
+    conda:
+        CFG['conda_envs']['optparse']
+    wildcard_constraints:
+         cnvs_wc = "no_cnvs"
+    shell:
+        op.as_one_line("""
+        Rscript {params.lymphgen_path} -m {input.mutation_flat} -s {input.sample_annotation} -g {input.gene_list}
+        -o {output.result} > {log.stdout} 2> {log.stderr}""")
+
+
+# Symlinks the final output files into the module results directory (under '99-outputs/')
+rule _lymphgen_output_txt:
+    input:
+        txt = str(rules._lymphgen_run_cnv.output.result)
+    output:
+        txt = CFG["dirs"]["outputs"] + "{outprefix}.lymphgen_calls.{cnvs_wc}.{sv_wc}.tsv"
+    run:
+        op.relative_symlink(input.txt, output.txt)
+
+
+# Generates the target sentinels for each run, which generate the symlinks
+
+# Set the applicable wildcards, based on the provided input files
+# Are we running LymphGen with CNV/SV data?
+if "sample_seg" in CFG["inputs"] and CFG["inputs"]["sample_seg"] != "" and CFG["inputs"]["sample_seg"] != "None":
+    cnvs_wc = ["with_cnvs", "no_cnvs"]
+else:
+    cnvs_wc = ["no_cnvs"]
+
+if "sample_sv_info" in CFG["inputs"] and CFG["inputs"]["sample_sv_info"] != "" and CFG["inputs"]["sample_sv_info"] != "None":
+    sv_wc = ["with_sv", "no_sv"]
+else:
+    sv_wc = ["no_sv"]
+
+rule _lymphgen_all:
+    input:
+        expand(
+            [
+                str(rules._lymphgen_output_txt.output.txt),
+            ],
+            outprefix = outprefix,
+            cnvs_wc = cnvs_wc,
+            sv_wc = sv_wc
+
+
+        )
+
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/lymphgen/1.0/schemas/lymphgen_schema_blank.yaml b/modules/lymphgen/1.0/schemas/lymphgen_schema_blank.yaml
new file mode 100644
index 000000000..0decc9fc1
--- /dev/null
+++ b/modules/lymphgen/1.0/schemas/lymphgen_schema_blank.yaml
@@ -0,0 +1,16 @@
+$schema: "http://json-schema.org/draft-06/schema#"
+
+description: Required fields for each sample row in the samples data frame
+
+# No sample table required. Hence no columns are required
+# Have some ASCII art instead
+#     WW
+#    /__\
+#   | oo |      _WWWWW_
+#  (|_()_|)    /  o o  \
+#    \__/    (|  __O__  |)
+#   /|\/|\     \ \___/ /
+#  ||||||||    /-------\
+#  ||||||||   |=========|
+#  ||||||||   |=========|
+#      Bert & Ernie
diff --git a/modules/lymphgen/CHANGELOG.md b/modules/lymphgen/CHANGELOG.md
new file mode 100644
index 000000000..3d0237843
--- /dev/null
+++ b/modules/lymphgen/CHANGELOG.md
@@ -0,0 +1,16 @@
+# Changelog
+
+All notable changes to the `lymphgen` module will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0] - 2021-11-02
+
+This release was authored by Chris "coolbeans" Rushton.
+
+Initial release, adding LymphGen, LGenIC, and allowing for CNV data (if availible)
+You can run LymphGen using just SNVs, or with CNV and SV data
+Note if you provide CNV and SV files, you should specify the appropriate column names in the config file
+All possible iterations of LymphGen will be run (i.e. if you provide both CNVs and SNVs, LymphGen will be run
+with both CNVs and SNVs, as well as just with SNVs)
diff --git a/modules/manta/2.3/manta.smk b/modules/manta/2.3/manta.smk
index e1ff97f6f..3a84e0778 100644
--- a/modules/manta/2.3/manta.smk
+++ b/modules/manta/2.3/manta.smk
@@ -6,15 +6,34 @@
 
 # Original Snakefile Author:    Bruno Grande
 # Module Author:                Bruno Grande
-# Additional Contributors:      N/A
+# Additional Contributors:      Chris Rushton
 
 
 ##### SETUP #####
 
-
-# Import package with useful functions for developing analysis modules.
+# Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.12"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section
+
 # Setup module and store module-specific configuration in `CFG`.
 CFG = op.setup_module(
     name = "manta", 
@@ -47,9 +66,9 @@ rule _manta_input_bam:
         sample_bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai",
         sample_crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai"
     run:
-        op.relative_symlink(input.sample_bam, output.sample_bam)
-        op.relative_symlink(input.sample_bai, output.sample_bai)
-        op.relative_symlink(input.sample_bai, output.sample_crai)
+        op.absolute_symlink(input.sample_bam, output.sample_bam)
+        op.absolute_symlink(input.sample_bai, output.sample_bai)
+        op.absolute_symlink(input.sample_bai, output.sample_crai)
 
 
 # bgzip-compress and tabix-index the BED file to meet Manta requirement
@@ -67,6 +86,19 @@ rule _manta_index_bed:
         tabix {output.bedz}
         """)
 
+def _manta_get_capspace(wildcards):
+    CFG = config["lcr-modules"]["manta"]
+    try:
+        # Get the appropriate capture space for this sample
+        this_bed = op.get_capture_space(CFG, wildcards.tumour_id, wildcards.genome_build, wildcards.seq_type, "bed.gz")
+        this_bed = reference_files(this_bed)
+    except NameError:
+        # If we are using an older version of the reference workflow, use the same region file as the genome sample
+        this_bed = str(config["lcr-modules"]["manta"]["dirs"]["chrom_bed"] + "{genome_build}.main_chroms.bed.gz")
+    # If this is a genome sample, return a BED file listing all chromosomes
+    if wildcards.seq_type != "capture":
+        this_bed = str(config["lcr-modules"]["manta"]["dirs"]["chrom_bed"] + "{genome_build}.main_chroms.bed.gz")
+    return this_bed
 
 # Configures the manta workflow with the input BAM files and reference FASTA file.
 rule _manta_configure_paired:
@@ -75,7 +107,7 @@ rule _manta_configure_paired:
         normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
         config = op.switch_on_wildcard("seq_type", CFG["switches"]["manta_config"]),
-        bedz = str(rules._manta_index_bed.output.bedz)
+        bedz = _manta_get_capspace
     output:
         runwf = CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     log:
@@ -102,7 +134,7 @@ rule _manta_configure_unpaired:
         tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
         config = op.switch_on_wildcard("seq_type", CFG["switches"]["manta_config"]),
-        bedz = str(rules._manta_index_bed.output.bedz)
+        bedz = _manta_get_capspace
     output:
         runwf = CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     log:
@@ -117,7 +149,7 @@ rule _manta_configure_unpaired:
         CFG["conda_envs"]["manta"]
     shell:
         op.as_one_line("""
-        configManta.py {params.opts} --referenceFasta {input.fasta} --callRegions {input.bedz}
+        configManta.py {params.opts} --referenceFasta {input.fasta} --callRegions {params.bedz}
         --runDir "$(dirname {output.runwf})" {params.tumour_bam_arg_name} {input.tumour_bam}
         --config {input.config} > {log.stdout} 2> {log.stderr}
         """)
@@ -128,12 +160,13 @@ rule _manta_run:
     input:
         runwf = CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     output:
-        variants_dir = directory(CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/"),
+        variants_dir = directory(CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/")
     log:
         stdout = CFG["logs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/manta_run.stdout.log",
         stderr = CFG["logs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/manta_run.stderr.log"
     params:
-        opts = CFG["options"]["manta"]
+        opts = CFG["options"]["manta"],
+        workspace_dir = directory(CFG["dirs"]["manta"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/workspace/")
     conda:
         CFG["conda_envs"]["manta"]
     threads:
@@ -142,13 +175,12 @@ rule _manta_run:
         mem_mb = CFG["mem_mb"]["manta"],
         bam = 1
     shell:
-        op.as_one_line("""
-        {input.runwf} {params.opts} --jobs {threads} > {log.stdout} 2> {log.stderr}
+        op.as_one_line(""" 
+            rm -fr {params.workspace_dir}/* ;
+            {input.runwf} {params.opts} --jobs {threads} > {log.stdout} 2> {log.stderr}
             &&
-        rm -rf "$(dirname {input.runwf})/workspace/"
+            rm -rf "$(dirname {input.runwf})/workspace/"
         """)
-
-
 # Calculates the tumour and/or normal variant allele fractions (VAF) from the allele counts
 # and fixes the sample IDs in the VCF header to match sample IDs used in Snakemake
 rule _manta_augment_vcf:
@@ -202,7 +234,7 @@ rule _manta_output_vcf:
     output:
         vcf = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{vcf_name}/{tumour_id}--{normal_id}--{pair_status}.{vcf_name}.vcf"
     run:
-        op.relative_symlink(input.vcf, output.vcf)
+        op.relative_symlink(input.vcf, output.vcf, in_module=True)
 
 
 # Symlinks the final BEDPE files
@@ -212,7 +244,7 @@ rule _manta_output_bedpe:
     output:
         bedpe = CFG["dirs"]["outputs"] + "bedpe/{seq_type}--{genome_build}/{vcf_name}/{tumour_id}--{normal_id}--{pair_status}.{vcf_name}.bedpe"
     run:
-        op.relative_symlink(input.bedpe, output.bedpe)
+        op.relative_symlink(input.bedpe, output.bedpe, in_module=True)
 
 
 def _manta_predict_output(wildcards):
@@ -273,6 +305,7 @@ def _manta_predict_output(wildcards):
     return outputs_with_bedpe + outputs_without_bedpe
 
 
+
 # Generates the target symlinks for each run depending on the Manta output VCF files
 rule _manta_dispatch:
     input:
diff --git a/modules/mixcr/1.1/mixcr.smk b/modules/mixcr/1.1/mixcr.smk
index 05e63a6dc..ac24689a8 100644
--- a/modules/mixcr/1.1/mixcr.smk
+++ b/modules/mixcr/1.1/mixcr.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["mixcr"]`
 CFG = op.setup_module(
@@ -43,8 +63,8 @@ rule _mixcr_input_fastq:
         fastq_1 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R1.fastq.gz",
         fastq_2 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R2.fastq.gz",
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 # Installs latest MiXCR release from github if the mixcr folder is not present yet
 rule _install_mixcr:
@@ -110,8 +130,8 @@ rule _mixcr_output_txt:
         txt = CFG["dirs"]["outputs"] + "txt/{seq_type}--{genome_build}/mixcr.{sample_id}.clonotypes.ALL.txt",
         report = CFG["dirs"]["outputs"] + "txt/{seq_type}--{genome_build}/mixcr.{sample_id}.report"
     run:
-        op.relative_symlink(input.txt, output.txt)
-        op.relative_symlink(input.report, output.report)
+        op.relative_symlink(input.txt, output.txt, in_module=True)
+        op.relative_symlink(input.report, output.report, in_module=True)
 
 
 # Generates the target sentinels for each run, which generate the symlinks
diff --git a/modules/mutect2/2.0/mutect2.smk b/modules/mutect2/2.0/mutect2.smk
index 9d43eb36b..cdf0c5afd 100644
--- a/modules/mutect2/2.0/mutect2.smk
+++ b/modules/mutect2/2.0/mutect2.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 import inspect
+import pandas as pd
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.12"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
 
 # Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
 min_oncopipe_version="1.0.11"
@@ -28,8 +49,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -52,7 +75,20 @@ localrules:
 
 
 ##### RULES #####
-
+def _mutect_get_capspace(wildcards):
+    CFG = config["lcr-modules"]["mutect2"]
+    try:
+        # Get the appropriate capture space for this sample
+        cap_space = op.get_capture_space(CFG, wildcards.tumour_id, wildcards.genome_build, wildcards.seq_type, "interval_list")
+        cap_space = reference_files(cap_space)
+        this_space = cap_space
+    # If we are using an older version of the reference workflow, we don't need to do anything
+    except NameError:
+        this_space = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed")
+    # If this isn't a capture sample, we don't have a capture space, so return nothing
+    if wildcards.seq_type != "capture":
+        this_space = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.bed")
+    return this_space
 
 # Symlinks the input files into the module results directory (under '00-inputs/')
 rule _mutect2_input_bam:
@@ -63,8 +99,8 @@ rule _mutect2_input_bam:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
         bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
 
 rule _mutect2_dummy_positions:
     # creates a dummy vcf if users do not specify candidateSmallIndels file
@@ -75,11 +111,27 @@ rule _mutect2_dummy_positions:
 # Symlink chromosomes used for parallelization
 checkpoint _mutect2_input_chrs:
     input:
-        chrs = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.txt")
+        candidate_positions = CFG["inputs"]["candidate_positions"] if CFG["inputs"]["candidate_positions"] else str(rules._mutect2_dummy_positions.output),
+        chrs = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.txt"),
+        capture_arg = _mutect_get_capspace
     output:
-        chrs = CFG["dirs"]["inputs"] + "chroms/{genome_build}/main_chromosomes.txt"
+        chrs = CFG["dirs"]["inputs"] + "chroms/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/mutated_chromosomes.txt"
     run:
-        op.relative_symlink(input.chrs, output.chrs)
+        # obtain list of main chromosomes
+        main_chrs = pd.read_csv(input.chrs, comment='#', sep='\t', header=None)
+        main_chrs = main_chrs.iloc[:, 0].astype(str).unique().tolist()
+        #obtain list of chromosomes in candidate positions
+        candidate_chrs = pd.read_csv(input.candidate_positions, comment='#', sep='\t')
+        candidate_chrs = candidate_chrs.iloc[:, 0].astype(str).unique().tolist()
+        # obtain list of chromosomes in the capture space
+        interval_chrs = pd.read_csv(input.capture_arg, comment='@', sep='\t')
+        interval_chrs = interval_chrs.iloc[:, 0].astype(str).unique().tolist()
+        # intersect the three lists to obtain chromosomes present in all
+        intersect_chrs = list(set(main_chrs) & set(candidate_chrs) & set(interval_chrs))
+        # convert list to single-column df
+        intersect_chrs = pd.DataFrame(intersect_chrs).sort_values(0)
+        # write out the file with mutated chromosomes
+        intersect_chrs.to_csv(output.chrs, index=False, header=False)
 
 
 # Retrieves from SM tag from BAM and writes to file
@@ -101,13 +153,13 @@ rule _mutect2_get_sm:
 # The first function loads the wildcard-containing file path and additional args from the config. 
 # The second replaces wildcards with those used in the rule. 
 def _mutect2_get_interval_cli_arg(
-    vcf_in = config["lcr-modules"]["mutect2"]["inputs"]["candidate_positions"], 
+    vcf_in = config["lcr-modules"]["mutect2"]["inputs"]["candidate_positions"],
     interval_arg_in = config["lcr-modules"]["mutect2"]["options"]["mutect2_interval_rules"]
 ):
     def _mutect2_get_interval_cli_custom(wildcards, input):
         if vcf_in:
             param = f"-L {input.candidate_positions} {interval_arg_in}"
-        else: 
+        else:
             param = ""
         return param
     return _mutect2_get_interval_cli_custom
@@ -123,7 +175,8 @@ rule _mutect2_run_matched_unmatched:
         gnomad = reference_files("genomes/{genome_build}/variation/af-only-gnomad.{genome_build}.vcf.gz"),
         normal_sm = CFG["dirs"]["mutect2"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{normal_id}_sm.txt", 
         pon = reference_files("genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.gz"), 
-        candidate_positions = CFG["inputs"]["candidate_positions"] if CFG["inputs"]["candidate_positions"] else str(rules._mutect2_dummy_positions.output)
+        candidate_positions = CFG["inputs"]["candidate_positions"] if CFG["inputs"]["candidate_positions"] else str(rules._mutect2_dummy_positions.output),
+        capture_arg = _mutect_get_capspace
     output:
         vcf = temp(CFG["dirs"]["mutect2"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/chromosomes/{chrom}.output.vcf.gz"),
         tbi = temp(CFG["dirs"]["mutect2"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/chromosomes/{chrom}.output.vcf.gz.tbi"),
@@ -150,7 +203,7 @@ rule _mutect2_run_matched_unmatched:
         -I {input.tumour_bam} -I {input.normal_bam}
         -R {input.fasta} -normal "$(cat {input.normal_sm})" -O {output.vcf}
         --germline-resource {input.gnomad} 
-        -L {wildcards.chrom} {params.interval_arg} 
+        -L {wildcards.chrom} {params.interval_arg} -L {input.capture_arg}
         -pon {input.pon} --f1r2-tar-gz {output.f1r2}
         > {log.stdout} 2> {log.stderr}
         """)
@@ -164,7 +217,8 @@ rule _mutect2_run_no_normal:
         dict = reference_files("genomes/{genome_build}/genome_fasta/genome.dict"),
         gnomad = reference_files("genomes/{genome_build}/variation/af-only-gnomad.{genome_build}.vcf.gz"),
         pon = reference_files("genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.gz"), 
-        candidate_positions = CFG["inputs"]["candidate_positions"] if CFG["inputs"]["candidate_positions"] else str(rules._mutect2_dummy_positions.output)
+        candidate_positions = CFG["inputs"]["candidate_positions"] if CFG["inputs"]["candidate_positions"] else str(rules._mutect2_dummy_positions.output),
+        capture_arg = _mutect_get_capspace
     output:
         vcf = temp(CFG["dirs"]["mutect2"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/chromosomes/{chrom}.output.vcf.gz"),
         tbi = temp(CFG["dirs"]["mutect2"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/chromosomes/{chrom}.output.vcf.gz.tbi"),
@@ -178,7 +232,7 @@ rule _mutect2_run_no_normal:
     params:
         mem_mb = lambda wildcards, resources: int(resources.mem_mb * 0.8),
         opts = CFG["options"]["mutect2_run"], 
-        interval_arg = _mutect2_get_interval_cli_arg 
+        interval_arg = _mutect2_get_interval_cli_arg()
     conda:
         CFG["conda_envs"]["gatk"]
     threads:
@@ -190,7 +244,7 @@ rule _mutect2_run_no_normal:
         gatk Mutect2 --java-options "-Xmx{params.mem_mb}m" 
         {params.opts} -I {input.tumour_bam} -R {input.fasta} 
         -O {output.vcf} --germline-resource {input.gnomad} 
-        -L {wildcards.chrom} {params.interval_arg}
+        -L {wildcards.chrom} {params.interval_arg} -L {input.capture_arg}
         -pon {input.pon} --f1r2-tar-gz {output.f1r2}
         > {log.stdout} 2> {log.stderr}
         """)
diff --git a/modules/picard_qc/1.0/picard_qc.smk b/modules/picard_qc/1.0/picard_qc.smk
index 27c69af80..b43f5af49 100644
--- a/modules/picard_qc/1.0/picard_qc.smk
+++ b/modules/picard_qc/1.0/picard_qc.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["picard_qc"]`
 CFG = op.setup_module(
@@ -47,8 +68,8 @@ rule _picard_qc_input_bam:
         sample_bam = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bam",
         sample_bai = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{sample_id}.bam.bai"
     run:
-        op.relative_symlink(input.sample_bam, output.sample_bam)
-        op.relative_symlink(input.sample_bai, output.sample_bai)
+        op.absolute_symlink(input.sample_bam, output.sample_bam)
+        op.absolute_symlink(input.sample_bai, output.sample_bai)
 
 
 rule _picard_qc_alignment_summary:
@@ -271,7 +292,7 @@ rule _picard_qc_merged_output:
     output:
         metrics = CFG["dirs"]["outputs"] + "merged_metrics/{seq_type}--{genome_build}/all.{metrics}.txt"
     run:
-        op.relative_symlink(input.metrics, output.metrics)
+        op.relative_symlink(input.metrics, output.metrics, in_module=True)
 
 
 rule _picard_qc_flagstats_output:
@@ -280,7 +301,7 @@ rule _picard_qc_flagstats_output:
     output:
         flagstats = CFG["dirs"]["outputs"] + "flagstats/{seq_type}--{genome_build}/{sample_id}.flagstats"
     run:
-        op.relative_symlink(input.flagstats, output.flagstats)
+        op.relative_symlink(input.flagstats, output.flagstats, in_module=True)
 
 
 def _get_picard_qc_files(wildcards):
diff --git a/modules/sage/1.0/sage.smk b/modules/sage/1.0/sage.smk
index 2204cda77..49d317842 100644
--- a/modules/sage/1.0/sage.smk
+++ b/modules/sage/1.0/sage.smk
@@ -16,7 +16,7 @@
 import oncopipe as op
 
 # Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
-min_oncopipe_version="1.0.11"
+min_oncopipe_version="1.0.12"
 import pkg_resources
 try:
     from packaging import version
@@ -27,8 +27,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -106,18 +108,19 @@ rule _download_sage_references:
         wget -O {output.high_conf_bed} {params.url}/HighConfidence.{params.build}.bed.gz
         """)
 
-# Non-standard chromosomes in rare cases cause SAGE error. This function will read the main chromosomes
-# file for each genome build using file produced by reference_files workflow, and supply it as
-# a comma-deliminated list of chromosomes for SAGE run.
-def get_chromosomes(wildcards):
-    chromosomes=[]
-    for i in range(1,23):
-        chromosomes.append(str(i))
-    chromosomes.append("X")
-    if "38" in str(wildcards.genome_build):
-        chromosomes = ["chr" + x for x in chromosomes]
-    chromosomes= ",".join(chromosomes)    
-    return chromosomes
+def _sage_get_capspace(wildcards):
+    CFG = config["lcr-modules"]["sage"]
+    try:
+        # Get the appropriate capture space for this sample
+        this_bed = op.get_capture_space(CFG, wildcards.tumour_id, wildcards.genome_build, wildcards.seq_type, "bed.gz")
+        this_bed = reference_files(this_bed)
+    except NameError:
+        # If we are using an older version of the reference workflow, use the same region file as the genome sample
+        this_bed = rules._download_sage_references.output.panel_bed
+    # If this is a genome sample, return a BED file listing all chromosomes
+    if wildcards.seq_type != "capture":
+        this_bed = rules._download_sage_references.output.panel_bed
+    return this_bed
 
 # Variant calling rule
 rule _run_sage:
@@ -125,18 +128,18 @@ rule _run_sage:
         tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
         normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
         fasta = str(rules._input_references.output.genome_fa),
-        hotspots = str(rules._download_sage_references.output.hotspots),
-        panel_bed = str(rules._download_sage_references.output.panel_bed),
-        high_conf_bed = str(rules._download_sage_references.output.high_conf_bed)
+        hotspots = rules._download_sage_references.output.hotspots,
+        high_conf_bed = str(rules._download_sage_references.output.high_conf_bed),
+        panel_bed = _sage_get_capspace,
+        main_chromosomes = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.txt")
     output:
         vcf = temp(CFG["dirs"]["sage"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.vcf"),
-        vcf_gz = CFG["dirs"]["sage"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.vcf.gz"
+        vcf_gz = temp(CFG["dirs"]["sage"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{tumour_id}--{normal_id}--{pair_status}.vcf.gz")
     log:
         stdout = CFG["logs"]["sage"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/run_sage.stdout.log",
         stderr = CFG["logs"]["sage"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/run_sage.stderr.log"
     params:
         opts = CFG["options"]["sage_run"],
-        chromosomes = get_chromosomes,
         assembly = lambda w: "hg38" if "38" in str({w.genome_build}) else "hg19",
         sage= "$(dirname $(readlink -e $(which SAGE)))/sage.jar",
         jvmheap = lambda wildcards, resources: int(resources.mem_mb * 0.8)
@@ -150,11 +153,13 @@ rule _run_sage:
         op.as_one_line("""
         echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname)" > {log.stdout}
         &&
+        SAGE_CHROMOSOMES=$(cat {input.main_chromosomes} | paste -sd, -)
+        &&
         java -Xms1G -Xmx{params.jvmheap}m
         -cp {params.sage} com.hartwig.hmftools.sage.SageApplication
         -threads {threads}
         {params.opts}
-        -chr {params.chromosomes}
+        -chr $SAGE_CHROMOSOMES
         -reference {wildcards.normal_id}
         -reference_bam {input.normal_bam}
         -tumor {wildcards.tumour_id} 
diff --git a/modules/salmon/1.1/salmon.smk b/modules/salmon/1.1/salmon.smk
index b9395aac4..08b5cf5ac 100644
--- a/modules/salmon/1.1/salmon.smk
+++ b/modules/salmon/1.1/salmon.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 import pandas as pd
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["salmon"]`
 CFG = op.setup_module(
@@ -42,8 +63,8 @@ rule _salmon_input_fastq:
         fastq_1 = CFG["dirs"]["inputs"] + "fastq/{seq_type}/{sample_id}.read1.fastq.gz",
         fastq_2 = CFG["dirs"]["inputs"] + "fastq/{seq_type}/{sample_id}.read2.fastq.gz"
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 
 rule _salmon_quant:
@@ -85,7 +106,7 @@ rule _salmon_output:
     output:
         quant = CFG["dirs"]["outputs"] + "quant_to_" + CFG["transcriptome"]["quant_to"] + "/{seq_type}/{sample_id}.quant.sf"
     run:
-        op.relative_symlink(input.quant, output.quant)
+        op.relative_symlink(input.quant, output.quant, in_module=True)
 
 
 rule export_sample_table:
diff --git a/modules/salmon/1.1/salmon_grouped.smk b/modules/salmon/1.1/salmon_grouped.smk
index 3c3a84b92..2f7c69bbd 100644
--- a/modules/salmon/1.1/salmon_grouped.smk
+++ b/modules/salmon/1.1/salmon_grouped.smk
@@ -15,6 +15,27 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 import pandas as pd
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["salmon"]`
 CFG = op.setup_module(
@@ -44,8 +65,8 @@ rule _salmon_input_fastq:
     group: 
         CFG["group"]["quant"]
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 
 rule _salmon_quant:
@@ -89,7 +110,7 @@ rule _salmon_output:
     output:
         quant = CFG["dirs"]["outputs"] + "quant_to_" + CFG["transcriptome"]["quant_to"] + "/{seq_type}/{sample_id}.quant.sf"
     run:
-        op.relative_symlink(input.quant, output.quant)
+        op.relative_symlink(input.quant, output.quant, in_module=True)
 
 
 rule export_sample_table:
diff --git a/modules/sequenza/1.4/sequenza.smk b/modules/sequenza/1.4/sequenza.smk
index 0d30155ef..1d863275f 100644
--- a/modules/sequenza/1.4/sequenza.smk
+++ b/modules/sequenza/1.4/sequenza.smk
@@ -15,7 +15,6 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
-
 # Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
 min_oncopipe_version="1.0.11"
 import pkg_resources
@@ -28,8 +27,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section
@@ -78,7 +79,7 @@ checkpoint _sequenza_input_chroms:
     output:
         txt = CFG["dirs"]["inputs"] + "chroms/{genome_build}/main_chromosomes.txt"
     run:
-        op.relative_symlink(input.txt, output.txt)
+        op.absolute_symlink(input.txt, output.txt)
 
 
 # Pulls in list of chromosomes for the genome builds
diff --git a/modules/slms_3/1.0/slms_3.smk b/modules/slms_3/1.0/slms_3.smk
index cf37f1df0..aa7b2f62d 100644
--- a/modules/slms_3/1.0/slms_3.smk
+++ b/modules/slms_3/1.0/slms_3.smk
@@ -28,8 +28,10 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -245,8 +247,11 @@ rule _slms_3_annotate_sage_gnomad:
         bcftools annotate --threads {threads} 
         -a {input.gnomad} -c INFO/AF {input.vcf} | 
         awk 'BEGIN {{FS=OFS="\\t"}} {{ if ($1 !~ /^#/ && $8 !~ ";AF=") $8=$8";AF=0"; print $0; }}' | 
-        sed 's/{wildcards.tumour_id}/TUMOR/g' | 
-        sed 's/{wildcards.normal_id}/NORMAL/g' | 
+        perl -ne '$norm="{wildcards.normal_id}";
+                  $tum="{wildcards.tumour_id}";
+                  s/(\s)$tum(\s)/$1TUMOR$2/;
+                  s/(\s)$norm(\s)/$1NORMAL$2/;
+                  print;' |
         bcftools view -s "NORMAL,TUMOR" -i 'INFO/AF < 0.0001' -Oz -o {output.vcf} 2> {log.stderr} 
         &&
         tabix -p vcf {output.vcf} 2>> {log.stderr}
@@ -299,10 +304,9 @@ rule _slms_3_mutect2_depth_filt:
         CFG_SLMS3["threads"]["mutect2_depth_filt"]
     shell: 
         op.as_one_line("""
-        tsamp=$(zgrep "##tumor_sample=" {input.vcf} | sed 's|##tumor_sample=||g');
-        nsamp=$(zgrep "##normal_sample=" {input.vcf} | sed 's|##normal_sample=||g');
         bcftools view {input.vcf} | 
-        sed "s|$tsamp|TUMOR|g" | sed "s|$nsamp|NORMAL|g" |  
+        perl -ne 'if(/^\#\#normal_sample=(.+)$/){{$norm=$1;}}if(/tumor_sample=(.+)$/){{$tum = $1;}}s/(\s)$tum(\s)/$1TUMOR$2/;s/(\s)$norm(\s)/$1NORMAL$2/;print;'|
+        sed 's/##INFO=<ID=AS_FilterStatus,Number=A/##INFO=<ID=AS_FilterStatus,Number=1/' |   
         bcftools view  -s "NORMAL,TUMOR" -i 'FMT/DP[@{input.table}] >= 10 && FMT/AD[@{input.table}:1] >= 4 && FMT/AF[@{input.table}:0] >= 0.1' 
         -Oz -o {output.vcf} 2> {log.stderr} && 
         tabix -p vcf {output.vcf} 2>> {log.stderr}
@@ -450,4 +454,5 @@ rule _slms_3_all:
 
 # Perform some clean-up tasks, including storing the module-specific
 # configuration on disk and deleting the `CFG` variable
-# op.cleanup_module(CFG_SLMS3)
+CFG=CFG_SLMS3
+op.cleanup_module(CFG)
diff --git a/modules/star/1.4/star.smk b/modules/star/1.4/star.smk
index 31c3a17f6..f07d43790 100644
--- a/modules/star/1.4/star.smk
+++ b/modules/star/1.4/star.smk
@@ -18,6 +18,26 @@ import os
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["star"]`
 CFG = op.setup_module(
@@ -52,8 +72,8 @@ rule _star_input_fastq:
         fastq_1 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R1.fastq.gz",
         fastq_2 = CFG["dirs"]["inputs"] + "fastq/{seq_type}--{genome_build}/{sample_id}.R2.fastq.gz"
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 # Function to retrieve read length from sample table
 def get_overhang(wildcards,build = False):
@@ -118,7 +138,7 @@ rule _star_symlink_star_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
 
 
 # Create symlink in subdirectory where duplicates will be marked by the `utils` module
@@ -133,7 +153,7 @@ rule _star_symlink_sorted_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
         os.remove(input.star_bam)
         shell("touch {input.star_bam}.deleted")
 
@@ -151,8 +171,8 @@ rule _star_output_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bam + ".bai")
+        op.relative_symlink(input.bam, output.bam, in_module = True)
+        op.relative_symlink(input.bai, output.bam + ".bai", in_module = True)
         os.remove(input.sorted_bam)
         shell("touch {input.sorted_bam}.deleted")
 
diff --git a/modules/star/1.4/star_grouped.smk b/modules/star/1.4/star_grouped.smk
index 43e84d936..1c0bab3cc 100644
--- a/modules/star/1.4/star_grouped.smk
+++ b/modules/star/1.4/star_grouped.smk
@@ -18,6 +18,26 @@ import os
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["star"]`
 CFG = op.setup_module(
@@ -54,8 +74,8 @@ rule _star_input_fastq:
     group: 
         CFG["group"]["star"]
     run:
-        op.relative_symlink(input.fastq_1, output.fastq_1)
-        op.relative_symlink(input.fastq_2, output.fastq_2)
+        op.absolute_symlink(input.fastq_1, output.fastq_1)
+        op.absolute_symlink(input.fastq_2, output.fastq_2)
 
 # Function to retrieve read length from sample table
 def get_overhang(wildcards,build = False):
@@ -123,7 +143,7 @@ rule _star_symlink_star_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
 
 
 # Create symlink in subdirectory where duplicates will be marked by the `utils` module
@@ -138,7 +158,7 @@ rule _star_symlink_sorted_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bam, output.bam)
         os.remove(input.star_bam)
         shell("touch {input.star_bam}.deleted")
 
@@ -156,8 +176,8 @@ rule _star_output_bam:
     wildcard_constraints: 
         sample_id = "|".join(sample_ids_star)
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bam + ".bai")
+        op.relative_symlink(input.bam, output.bam, in_module=True)
+        op.relative_symlink(input.bai, output.bam + ".bai", in_module=True)
         os.remove(input.sorted_bam)
         shell("touch {input.sorted_bam}.deleted")
 
diff --git a/modules/starfish/2.0/starfish.smk b/modules/starfish/2.0/starfish.smk
index 5fe593029..d3213e5ae 100644
--- a/modules/starfish/2.0/starfish.smk
+++ b/modules/starfish/2.0/starfish.smk
@@ -31,8 +31,30 @@ except ModuleNotFoundError:
 
 current_version = pkg_resources.get_distribution("oncopipe").version
 if version.parse(current_version) < version.parse(min_oncopipe_version):
-    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
-    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
     sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
 
 # End of dependency checking section 
@@ -75,8 +97,8 @@ rule _starfish_input_vcf:
         vcf = CFG["dirs"]["inputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{caller}.vcf.gz", 
         tbi = CFG["dirs"]["inputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.{caller}.vcf.gz.tbi"
     run:
-        op.relative_symlink(input.vcf, output.vcf), 
-        op.relative_symlink(input.vcf + ".tbi", output.tbi)
+        op.absolute_symlink(input.vcf, output.vcf), 
+        op.absolute_symlink(input.vcf + ".tbi", output.tbi)
 
 
 # Run Starfish
diff --git a/modules/strelka/1.1/strelka.smk b/modules/strelka/1.1/strelka.smk
index d7beac0a4..089fa0857 100755
--- a/modules/strelka/1.1/strelka.smk
+++ b/modules/strelka/1.1/strelka.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.12"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["strelka"]`
 CFG = op.setup_module(
@@ -33,10 +53,7 @@ localrules:
     _strelka_configure_unpaired,
     _strelka_filter_combine,
     _strelka_output_filtered_vcf,
-    _strelka_all,
-
-wildcard_constraints: 
-    var_type = "somatic.snvs|somatic.indels|variants"
+    _strelka_all
 
 ##### RULES #####
 
@@ -50,9 +67,9 @@ rule _strelka_input_bam:
         bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai",
         crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bai)
-        op.relative_symlink(input.bai, output.crai)
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bai)
+        op.absolute_symlink(input.bai, output.crai)
 
 
 rule _strelka_dummy_vcf:
@@ -102,14 +119,29 @@ def _strelka_get_indel_cli_arg(vcf_in = config["lcr-modules"]["strelka"]["inputs
         return param
     return _strelka_get_indel_cli_custom
 
+def _strelka_get_capspace(wildcards):
+    CFG = config["lcr-modules"]["strelka"]
+    try:
+        # Get the appropriate capture space for this sample
+        this_bed = op.get_capture_space(CFG, wildcards.tumour_id, wildcards.genome_build, wildcards.seq_type, "bed.gz")
+        this_bed = reference_files(this_bed)
+    except NameError:
+        # If we are using an older version of the reference workflow, use the same region file as the genome sample
+        this_bed = str(config["lcr-modules"]["strelka"]["dirs"]["chrom_bed"] + "{genome_build}.main_chroms.bed.gz")
+    # If this is a genome sample, return a BED file listing all chromosomes
+    if wildcards.seq_type != "capture":
+        this_bed = str(config["lcr-modules"]["strelka"]["dirs"]["chrom_bed"] + "{genome_build}.main_chroms.bed.gz")
+
+    return this_bed
+
 
 rule _strelka_configure_paired: # Somatic
     input:
         tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
         normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
-        bedz = str(rules._strelka_index_bed.output.bedz),
-        indels = str(rules._strelka_input_vcf.output.vcf) if CFG["inputs"]["candidate_small_indels"] else str(rules._strelka_dummy_vcf.output)
+        indels = str(rules._strelka_input_vcf.output.vcf) if CFG["inputs"]["candidate_small_indels"] else str(rules._strelka_dummy_vcf.output),
+        bedz = _strelka_get_capspace
     output:
         runwf = CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     log:
@@ -117,7 +149,7 @@ rule _strelka_configure_paired: # Somatic
         stderr = CFG["logs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/strelka_configure.stderr.log"
     params:
         indel_arg = _strelka_get_indel_cli_arg(),
-        opts = op.switch_on_wildcard("seq_type", CFG["options"]["configure"]),
+        opts = op.switch_on_wildcard("seq_type", CFG["options"]["configure"])
     wildcard_constraints:
         pair_status = "matched|unmatched"
     conda:
@@ -140,7 +172,7 @@ rule _strelka_configure_unpaired: # germline
     input:
         tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam",
         fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
-        bedz = str(rules._strelka_index_bed.output.bedz)
+        bedz = _strelka_get_capspace
     output:
         runwf = CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     log:
@@ -195,8 +227,8 @@ rule _strelka_run_paired:
     input:
         runwf = CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/runWorkflow.py"
     output:
-        vcf_snvs = CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/somatic.snvs.vcf.gz",
-        vcf_indels = CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/somatic.indels.vcf.gz"
+        vcf_snvs = temp(CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/somatic.snvs.vcf.gz"),
+        vcf_indels = temp(CFG["dirs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/results/variants/somatic.indels.vcf.gz")
     log:
         stdout = CFG["logs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/strelka_run.stdout.log",
         stderr = CFG["logs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/strelka_run.stderr.log"
@@ -234,6 +266,8 @@ rule _strelka_filter_combine:
     log:
         stdout = CFG["logs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/strelka_filter_combine.stdout.log",
         stderr = CFG["logs"]["strelka"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/strelka_filter_combine.stderr.log"
+    wildcard_constraints:
+        var_type = "somatic.snvs|somatic.indels|variants"
     shell:
         op.as_one_line("""
         bcftools concat -a {input.vcf} | 
@@ -256,6 +290,7 @@ def _strelka_get_output(wildcards):
         vcf = str(rules._strelka_filter_combine.output.vcf)
     return vcf
 
+
 # Symlinks the final output files into the module results directory (under '99-outputs/'). Links will always use "combined" in the name (dropping odd naming convention used by Strelka in unpaired mode)
 rule _strelka_output_filtered_vcf:
     input:
@@ -264,8 +299,8 @@ rule _strelka_output_filtered_vcf:
         vcf = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.strelka.combined.vcf.gz",
         vcf_tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.strelka.combined.vcf.gz.tbi"
     run:
-        op.relative_symlink(input.vcf, output.vcf)
-        op.relative_symlink(str(input.vcf) + ".tbi", output.vcf_tbi)
+        op.relative_symlink(input.vcf, output.vcf, in_module=True)
+        op.relative_symlink(str(input.vcf) + ".tbi", output.vcf_tbi, in_module=True)
 
 
 
diff --git a/modules/stringtie/1.0/config/default.yaml b/modules/stringtie/1.0/config/default.yaml
new file mode 100644
index 000000000..0c91239ca
--- /dev/null
+++ b/modules/stringtie/1.0/config/default.yaml
@@ -0,0 +1,29 @@
+lcr-modules:
+
+    stringtie_run:
+
+        inputs:
+            # Available wildcards: {seq_type} {genome_build} {sample_id}
+            # Module designed to work with STAR bams output from modules/STAR/1.4
+            sample_bam: "__UPDATE__"
+            sample_bai: "__UPDATE__"
+            XS_script: "{MODSDIR}/src/tagXSstrandedData.awk"
+
+        options:
+            stringtie_run: ""
+
+        conda_envs:
+            stringtie: "{MODSDIR}/envs/stringtie.yaml"
+
+        threads: 
+            stringtie_run: 16
+
+        resources:
+            stringtie_run: 
+                mem_mb: 15000
+
+        pairing_config:
+            mrna:
+                run_paired_tumours: False
+                run_unpaired_tumours_with: "no_normal"
+                run_paired_tumours_as_unpaired: True
\ No newline at end of file
diff --git a/modules/stringtie/1.0/envs/stringtie.yaml b/modules/stringtie/1.0/envs/stringtie.yaml
new file mode 120000
index 000000000..84dcdb47a
--- /dev/null
+++ b/modules/stringtie/1.0/envs/stringtie.yaml
@@ -0,0 +1 @@
+../../../../envs/stringtie/stringtie.yaml
\ No newline at end of file
diff --git a/modules/stringtie/1.0/schemas/base-1.0.yaml b/modules/stringtie/1.0/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/stringtie/1.0/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/stringtie/1.0/src/tagXSstrandedData.awk b/modules/stringtie/1.0/src/tagXSstrandedData.awk
new file mode 100644
index 000000000..60d792a97
--- /dev/null
+++ b/modules/stringtie/1.0/src/tagXSstrandedData.awk
@@ -0,0 +1,41 @@
+## Authored by Alex Dobin, available at https://github.com/alexdobin/STAR/tree/master/extras/scripts
+
+# usage:
+# cat Aligned.out.sam | awk -v strType=2 -f tagXSstrandedData.awk 
+# strType defines strandedness of the libraries: strType = mate whose strand is the same as RNA strand.
+# For instance, for Illumina Tru-seq, strType=2 - the 2nd mate's strand is the same as RNA.
+
+
+BEGIN {
+    OFS="\t";
+    strSym[0]="+";
+    strSym[1]="-";
+}
+
+{
+
+    if (substr($1,1,1)=="@" || $4==0)
+    {# header, or unmapped read - just print
+        print;
+        next;
+    };
+
+    str=and($2,0x10)/0x10;
+
+    if (and($2,0x1)==0)
+    {# single end defaults to mate
+        mate=1;
+    } else
+    {
+        mate=and($2,0x40)/0x40+2*and($2,0x80)/0x80;
+    };
+
+    if (mate>0 && mate <3)
+    {# mate is defined - add XS tag
+       if (mate!=strType) str=1-str; #revert strand if the mate is opposite
+       print $0 "\t" "XS:A:" strSym[str];
+    } else 
+    {# mate is not defined - just print
+       print;
+    };    
+}
diff --git a/modules/stringtie/1.0/stringtie.smk b/modules/stringtie/1.0/stringtie.smk
new file mode 100644
index 000000000..5a4799f90
--- /dev/null
+++ b/modules/stringtie/1.0/stringtie.smk
@@ -0,0 +1,127 @@
+#!/usr/bin/env snakemake
+
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Krysta Coyle
+# Module Author:    Krysta Coyle
+# Contributors:     N/A
+
+
+##### SETUP #####
+
+# Import package with useful functions for developing analysis modules
+import oncopipe as op
+
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
+    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section
+
+# Setup module and store module-specific configuration in `CFG`
+# `CFG` is a shortcut to `config["lcr-modules"]["stringtie"]`
+CFG = op.setup_module(
+    name = "stringtie",
+    version = "1.0",
+    subdirectories = ["inputs", "stringtie", "outputs"],
+)
+
+# Define rules to be run locally when using a compute cluster
+localrules:
+    _stringtie_input_bam,
+    _stringtie_output_gtf,
+    _stringtie_all,
+
+
+##### RULES #####
+
+
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _stringtie_input_bam:
+    input:
+        bam = CFG["inputs"]["sample_bam"],
+        bai = CFG["inputs"]["sample_bai"]
+    output:
+        bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam",
+        bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai"
+    group: 
+        "input_and_stringtie_run"
+    run:
+        op.absolute_symlink(input.bam, output.bam),
+        op.absolute_symlink(input.bai, output.bai)
+
+
+# Run stringtie
+rule _stringtie_run:
+    input:
+        bam = str(rules._stringtie_input_bam.output.bam),
+        ref_gtf = reference_files("genomes/{genome_build}/annotations/gencode_annotation-33.gtf"),
+        XS_script = CFG["inputs"]["XS_script"]
+    output:
+        gtf = CFG["dirs"]["stringtie"] + "{seq_type}--{genome_build}/{sample_id}/output.gtf"    
+    group: 
+        "input_and_stringtie_run"
+    log:
+        stdout = CFG["logs"]["stringtie"] + "{seq_type}--{genome_build}/{sample_id}/stringtie_run.stdout.log",
+        stderr = CFG["logs"]["stringtie"] + "{seq_type}--{genome_build}/{sample_id}/stringtie_run.stderr.log"
+    params:
+        opts = CFG["options"]["stringtie_run"]
+    conda:
+        CFG["conda_envs"]["stringtie"]
+    threads:
+        CFG["threads"]["stringtie_run"]
+    resources:
+        **CFG["resources"]["stringtie_run"]    # All resources necessary can be included and referenced from the config files.
+    shell:
+        op.as_one_line("""
+        samtools view -h {input.bam} | \
+        awk -v strType=2 -f {input.XS_script} | \
+        stringtie -o {output.gtf} -G {input.ref_gtf} {params.opts} \
+        -p {threads} -\
+        > {log.stdout} 2> {log.stderr}
+        """)
+
+
+# Symlinks the final output files into the module results directory (under '99-outputs/')
+rule _stringtie_output_gtf:
+    input:
+        gtf = str(rules._stringtie_run.output.gtf)
+    output:
+        gtf = CFG["dirs"]["outputs"] + "gtf/{seq_type}--{genome_build}/{sample_id}.output.filt.gtf"
+    run:
+        op.relative_symlink(input.gtf, output.gtf, in_module= True)
+
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _stringtie_all:
+    input:
+        expand(
+            [
+                str(rules._stringtie_output_gtf.output.gtf),
+            ],
+            zip,  # Run expand() with zip(), not product()
+            seq_type=CFG["samples"]["seq_type"],
+            genome_build=CFG["samples"]["genome_build"],
+            sample_id=CFG["samples"]["sample_id"])
+
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
diff --git a/modules/stringtie/CHANGELOG.md b/modules/stringtie/CHANGELOG.md
new file mode 100644
index 000000000..7f186f08e
--- /dev/null
+++ b/modules/stringtie/CHANGELOG.md
@@ -0,0 +1,15 @@
+# Changelog
+
+All notable changes to the `stringtie` module will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0] - 2021-11-30
+
+This release was authored by Krysta Coyle.
+
+- awk script necessary to introduce XS tags into STAR-aligned BAMs. Without XS tags, stringtie generates single-exon transcripts.
+- Reference GTF mandatory, highly recommended for well-annotated genomes.
+- Initial estimates of memory are conservative.
+- Module designed to work with STAR bams output from modules/STAR/1.4.
diff --git a/modules/utils/2.1/utils.smk b/modules/utils/2.1/utils.smk
index 585005fa6..c758f24c1 100644
--- a/modules/utils/2.1/utils.smk
+++ b/modules/utils/2.1/utils.smk
@@ -17,6 +17,26 @@ from os.path import join
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 _UTILS = config["lcr-modules"]["utils"]
 LOG = "/logs/" + op._session.launched_fmt
 
diff --git a/modules/varscan/1.1/varscan.smk b/modules/varscan/1.1/varscan.smk
index bf5baed39..856ee78e9 100644
--- a/modules/varscan/1.1/varscan.smk
+++ b/modules/varscan/1.1/varscan.smk
@@ -15,6 +15,26 @@
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["varscan"]`
 CFG = op.setup_module(
@@ -47,8 +67,8 @@ rule _varscan_input_bam:
     output:
         bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam"
     run:
-        op.relative_symlink(input.bam, output.bam)
-        op.relative_symlink(input.bai, output.bam + ".bai")
+        op.absolute_symlink(input.bam, output.bam)
+        op.absolute_symlink(input.bai, output.bam + ".bai")
 
 
 # Pulls in list of chromosomes for the genome builds
@@ -58,7 +78,7 @@ checkpoint _varscan_input_chroms:
     output:
         txt = CFG["dirs"]["inputs"] + "chroms/{genome_build}/main_chromosomes.txt"
     run:
-        op.relative_symlink(input.txt, output.txt)
+        op.absolute_symlink(input.txt, output.txt)
 
 # generate mpileups for tumour and normal bams separately. 
 # If we parallelize this by chromosome we will need 2 * 2 threads per chromosome but this should be a lot more efficient
@@ -247,7 +267,7 @@ rule _varscan_symlink_maf:
     output:
         vcf = CFG["dirs"]["maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/merged.vcf.gz"
     run:
-        op.relative_symlink(input.vcf, output.vcf)
+        op.absolute_symlink(input.vcf, output.vcf)
 
 
 # Symlinks the final output files into the module results directory (under '99-outputs/')
@@ -259,8 +279,8 @@ rule _varscan_output_vcf:
         vcf = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}-merged.vcf.gz",
         tbi = CFG["dirs"]["outputs"] + "vcf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}-merged.vcf.gz.tbi"
     run:
-        op.relative_symlink(input.vcf, output.vcf)
-        op.relative_symlink(input.tbi, output.tbi)
+        op.relative_symlink(input.vcf, output.vcf, in_module=True)
+        op.relative_symlink(input.tbi, output.tbi, in_module=True)
 
 
 rule _varscan_output_maf:
@@ -270,7 +290,7 @@ rule _varscan_output_maf:
     output:
         maf = CFG["dirs"]["outputs"] + "maf/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}-merged.maf"
     run:
-        op.relative_symlink(input.maf, output.maf)
+        op.relative_symlink(input.maf, output.maf, in_module=True)
 
 """
 def _varscan_get_output(wildcards):
diff --git a/modules/vcf2maf/1.2/vcf2maf.smk b/modules/vcf2maf/1.2/vcf2maf.smk
index d53c840b3..6d5e48b0f 100644
--- a/modules/vcf2maf/1.2/vcf2maf.smk
+++ b/modules/vcf2maf/1.2/vcf2maf.smk
@@ -16,6 +16,26 @@ from os.path import join
 
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    logger.warning(
+                '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}'
+                "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m'
+                )
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section 
+
 # Setup module and store module-specific configuration in `CONFIG`
 CFG = op.setup_module(
     name = "vcf2maf",
@@ -31,7 +51,7 @@ localrules:
     _vcf2maf_crossmap,
     _vcf2maf_all
 
-VERSION_MAP = {
+VCF2MAF_GENOME_VERSION_MAP = {
     "grch37": "GRCh37",
     "hg38": "GRCh38",
     "hs37d5": "GRCh37"
@@ -46,7 +66,7 @@ rule _vcf2maf_input_vcf:
     output:
         vcf_gz = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.vcf.gz"
     run:
-        op.relative_symlink(input.vcf_gz, output.vcf_gz)
+        op.absolute_symlink(input.vcf_gz, output.vcf_gz)
 
 rule _vcf2maf_decompress_vcf:
     input:
@@ -69,7 +89,7 @@ rule _vcf2maf_run:
         stderr = CFG["logs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}_vcf2maf.stderr.log",
     params:
         opts = CFG["options"]["vcf2maf"],
-        build = lambda w: VERSION_MAP[w.genome_build],
+        build = lambda w: VCF2MAF_GENOME_VERSION_MAP[w.genome_build],
         custom_enst = op.switch_on_wildcard("genome_build", CFG["switches"]["custom_enst"])
     conda:
         CFG["conda_envs"]["vcf2maf"]
@@ -91,7 +111,8 @@ rule _vcf2maf_run:
         --vep-data {input.vep_cache}
         --vep-path $vepPATH {params.opts}
         --custom-enst {params.custom_enst}
-        > {log.stdout} 2> {log.stderr}
+        > {log.stdout} 2> {log.stderr} &&
+        touch {output.vep}
         """)
 
 
@@ -142,8 +163,8 @@ rule _vcf2maf_output_maf:
     params:
         chain = lambda w: "hg38ToHg19" if "38" in str({w.genome_build}) else "hg19ToHg38"
     run:
-        op.relative_symlink(input.maf, output.maf)
-        op.relative_symlink((input.maf_converted+str("_")+str(params.chain)+str(".maf")), (output.maf[:-4]+str(".converted_")+str(params.chain)+str(".maf")))
+        op.relative_symlink(input.maf, output.maf, in_module=True)
+        op.relative_symlink((input.maf_converted+str("_")+str(params.chain)+str(".maf")), (output.maf[:-4]+str(".converted_")+str(params.chain)+str(".maf")), in_module=True)
 
 # Generates the target sentinels for each run, which generate the symlinks
 rule _vcf2maf_all:
@@ -161,4 +182,4 @@ rule _vcf2maf_all:
 
 # Perform some clean-up tasks, including storing the module-specific
 # configuration on disk and deleting the `CFG` variable
-op.cleanup_module(CFG)
\ No newline at end of file
+op.cleanup_module(CFG)
diff --git a/modules/vcf2maf/1.3/config/default.yaml b/modules/vcf2maf/1.3/config/default.yaml
new file mode 100644
index 000000000..493c9564b
--- /dev/null
+++ b/modules/vcf2maf/1.3/config/default.yaml
@@ -0,0 +1,55 @@
+lcr-modules:
+    
+    vcf2maf:
+        dirs: 
+            _parent: "__UPDATE__" #example for combining with Strelka module: "strelka-1.1_vcf2maf-1.0"
+        inputs:
+            # Available wildcards: {out_dir}, {seq_type}, {genome_build}, {tumour_id}, {normal_id}, {pair_status}, {vcf_name}, {base_name}
+            vep_cache: "__UPDATE__" #example "ref/ensembl_vep_cache/"
+            sample_vcf_gz: "__UPDATE__" #full path to your compressed vcfs from favourite variant caller (with wildcards. You must encapsulate everything unique to the variant caller naming scheme in {base_name})
+            convert_coord: "{SCRIPTSDIR}/crossmap/1.0/convert_maf_coords.sh"
+            src_dir: "{MODSDIR}/src"
+        vcf_base_name: "__UPDATE__" #put the consistent portion of the file name here. For Strelka, this is "combined.passed"
+        options:
+            vcf2maf: "--vcf-tumor-id TUMOR --vcf-normal-id NORMAL --cache-version 86"
+            # --filter-vcf     A VCF for FILTER tag common_variant. Set to 0 to disable [~/.vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz]
+            #--species        Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens]
+            #--cache-version  Version of offline cache to use with VEP (e.g. 75, 84, 91) [Default: Installed version]
+            species: "homo_sapiens"
+            gnomAD_cutoff: 0.001 # cut-off to be used for AF frequency of germline variants in gnomAD
+        # here you can specify path to txt file with a list of custom ENST IDs that override canonical selection
+        # it will be parsed to --custom-enst flag of vcf2maf
+        # if no non-canonical transcript IDs to be included, leave switches empty
+        switches:
+            custom_enst:
+              hg38: ""
+              grch37: ""
+              hs37d5: ""
+
+        conda_envs:
+            vcf2maf: "{MODSDIR}/envs/vcf2maf-1.6.18.yaml"
+            crossmap: "{SCRIPTSDIR}/crossmap/1.0/convert_maf_coords.yaml"
+            bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml"
+            
+        threads:
+            vcf2maf: 12
+            annotate: 4
+        
+        resources:
+            vcf2maf: 
+                mem_mb: 12000
+                vcf: 1
+            annotate: 
+                mem_mb: 12000
+            crossmap:
+                mem_mb: 12000
+
+        pairing_config:
+            genome:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: "unmatched_normal"
+                run_paired_tumours_as_unpaired: False
+            capture:
+                run_paired_tumours: True
+                run_unpaired_tumours_with: "unmatched_normal"
+                run_paired_tumours_as_unpaired: False
diff --git a/modules/vcf2maf/1.3/envs/bcftools-1.10.2.yaml b/modules/vcf2maf/1.3/envs/bcftools-1.10.2.yaml
new file mode 120000
index 000000000..72959e7bb
--- /dev/null
+++ b/modules/vcf2maf/1.3/envs/bcftools-1.10.2.yaml
@@ -0,0 +1 @@
+../../../../envs/bcftools/bcftools-1.10.2.yaml
\ No newline at end of file
diff --git a/modules/vcf2maf/1.3/envs/samtools-1.9.yaml b/modules/vcf2maf/1.3/envs/samtools-1.9.yaml
new file mode 120000
index 000000000..ab29288bb
--- /dev/null
+++ b/modules/vcf2maf/1.3/envs/samtools-1.9.yaml
@@ -0,0 +1 @@
+../../../../envs/samtools/samtools-1.9.yaml
\ No newline at end of file
diff --git a/modules/vcf2maf/1.3/envs/vcf2maf-1.6.18.yaml b/modules/vcf2maf/1.3/envs/vcf2maf-1.6.18.yaml
new file mode 120000
index 000000000..829077c73
--- /dev/null
+++ b/modules/vcf2maf/1.3/envs/vcf2maf-1.6.18.yaml
@@ -0,0 +1 @@
+../../../../envs/vcf2maf/vcf2maf-1.6.18.yaml
\ No newline at end of file
diff --git a/modules/vcf2maf/1.3/schemas/base-1.0.yaml b/modules/vcf2maf/1.3/schemas/base-1.0.yaml
new file mode 120000
index 000000000..0a69d1ceb
--- /dev/null
+++ b/modules/vcf2maf/1.3/schemas/base-1.0.yaml
@@ -0,0 +1 @@
+../../../../schemas/base/base-1.0.yaml
\ No newline at end of file
diff --git a/modules/vcf2maf/1.3/src/vcf2maf.pl b/modules/vcf2maf/1.3/src/vcf2maf.pl
new file mode 100755
index 000000000..23a35cf44
--- /dev/null
+++ b/modules/vcf2maf/1.3/src/vcf2maf.pl
@@ -0,0 +1,1408 @@
+#!/usr/bin/env perl
+
+# vcf2maf - Convert a VCF into a MAF by mapping each variant to only one of all possible gene isoforms
+
+use strict;
+use warnings;
+use IO::File;
+use Getopt::Long qw( GetOptions );
+use Pod::Usage qw( pod2usage );
+use File::Copy qw( move );
+use File::Path qw( mkpath );
+use Config;
+use Text::Wrap;
+use Data::Dumper;
+
+# Set any default paths and constants
+my ( $tumor_id, $normal_id ) = ( "TUMOR", "NORMAL" );
+my ( $vep_path, $vep_data, $vep_forks, $buffer_size, $any_allele, $inhibit_vep, $online, $vep_custom, $vep_config, $vep_overwrite  ) = ( "$ENV{HOME}/miniconda3/bin", "$ENV{HOME}/.vep", 4, 5000, 0, 0, 0, "", "", 0 );
+my ( $ref_fasta ) = ( "$ENV{HOME}/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" );
+my ( $species, $ncbi_build, $cache_version, $maf_center, $retain_info, $retain_fmt, $retain_ann, $min_hom_vaf, $max_subpop_af ) = ( "homo_sapiens", "GRCh37", "", ".", "", "", "", 0.7, 0.0004 );
+my $perl_bin = $Config{perlpath};
+
+# Set default formatting for any output command lines:
+$Text::Wrap::huge = 'overflow';
+$Text::Wrap::separator = " \\$/";
+
+# Hash to convert 3-letter amino-acid codes to their 1-letter codes
+my %aa3to1 = qw( Ala A Arg R Asn N Asp D Asx B Cys C Glu E Gln Q Glx Z Gly G His H Ile I Leu L
+    Lys K Met M Phe F Pro P Ser S Thr T Trp W Tyr Y Val V Xxx X Ter * );
+
+# Prioritize Sequence Ontology terms in order of severity, as estimated by Ensembl:
+# https://ensembl.org/info/genome/variation/prediction/predicted_data.html
+sub GetEffectPriority {
+    my ( $effect ) = @_;
+    $effect = '' unless( defined $effect );
+    my %effectPriority = (
+        'transcript_ablation' => 1, # A feature ablation whereby the deleted region includes a transcript feature
+        'exon_loss_variant' => 1, # A sequence variant whereby an exon is lost from the transcript
+        'splice_donor_variant' => 2, # A splice variant that changes the 2 base region at the 5' end of an intron
+        'splice_acceptor_variant' => 2, # A splice variant that changes the 2 base region at the 3' end of an intron
+        'stop_gained' => 3, # A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript
+        'frameshift_variant' => 3, # A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three
+        'stop_lost' => 3, # A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript
+        'start_lost' => 4, # A codon variant that changes at least one base of the canonical start codon
+        'initiator_codon_variant' => 4, # A codon variant that changes at least one base of the first codon of a transcript
+        'disruptive_inframe_insertion' => 5, # An inframe increase in cds length that inserts one or more codons into the coding sequence within an existing codon
+        'disruptive_inframe_deletion' => 5, # An inframe decrease in cds length that deletes bases from the coding sequence starting within an existing codon
+        'conservative_inframe_insertion' => 5, # An inframe increase in cds length that inserts one or more codons into the coding sequence between existing codons
+        'conservative_inframe_deletion' => 5, # An inframe decrease in cds length that deletes one or more entire codons from the coding sequence but does not change any remaining codons
+        'inframe_insertion' => 5, # An inframe non synonymous variant that inserts bases into the coding sequence
+        'inframe_deletion' => 5, # An inframe non synonymous variant that deletes bases from the coding sequence
+        'protein_altering_variant' => 5, # A sequence variant which is predicted to change the protein encoded in the coding sequence
+        'missense_variant' => 6, # A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved
+        'conservative_missense_variant' => 6, # A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious
+        'rare_amino_acid_variant' => 6, # A sequence variant whereby at least one base of a codon encoding a rare amino acid is changed, resulting in a different encoded amino acid
+        'transcript_amplification' => 7, # A feature amplification of a region containing a transcript
+        'splice_region_variant' => 8, # A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron
+        'start_retained_variant' => 9, # A sequence variant where at least one base in the start codon is changed, but the start remains
+        'stop_retained_variant' => 9, # A sequence variant where at least one base in the terminator codon is changed, but the terminator remains
+        'synonymous_variant' => 9, # A sequence variant where there is no resulting change to the encoded amino acid
+        'incomplete_terminal_codon_variant' => 10, # A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed
+        'coding_sequence_variant' => 11, # A sequence variant that changes the coding sequence
+        'mature_miRNA_variant' => 11, # A transcript variant located with the sequence of the mature miRNA
+        'exon_variant' => 11, # A sequence variant that changes exon sequence
+        '5_prime_UTR_variant' => 12, # A UTR variant of the 5' UTR
+        '5_prime_UTR_premature_start_codon_gain_variant' => 12, # snpEff-specific effect, creating a start codon in 5' UTR
+        '3_prime_UTR_variant' => 12, # A UTR variant of the 3' UTR
+        'non_coding_exon_variant' => 13, # A sequence variant that changes non-coding exon sequence
+        'non_coding_transcript_exon_variant' => 13, # snpEff-specific synonym for non_coding_exon_variant
+        'non_coding_transcript_variant' => 14, # A transcript variant of a non coding RNA gene
+        'nc_transcript_variant' => 14, # A transcript variant of a non coding RNA gene (older alias for non_coding_transcript_variant)
+        'intron_variant' => 14, # A transcript variant occurring within an intron
+        'intragenic_variant' => 14, # A variant that occurs within a gene but falls outside of all transcript features. This occurs when alternate transcripts of a gene do not share overlapping sequence
+        'INTRAGENIC' => 14, # snpEff-specific synonym of intragenic_variant
+        'NMD_transcript_variant' => 15, # A variant in a transcript that is the target of NMD
+        'upstream_gene_variant' => 16, # A sequence variant located 5' of a gene
+        'downstream_gene_variant' => 16, # A sequence variant located 3' of a gene
+        'TFBS_ablation' => 17, # A feature ablation whereby the deleted region includes a transcription factor binding site
+        'TFBS_amplification' => 17, # A feature amplification of a region containing a transcription factor binding site
+        'TF_binding_site_variant' => 17, # A sequence variant located within a transcription factor binding site
+        'regulatory_region_ablation' => 17, # A feature ablation whereby the deleted region includes a regulatory region
+        'regulatory_region_amplification' => 17, # A feature amplification of a region containing a regulatory region
+        'regulatory_region_variant' => 17, # A sequence variant located within a regulatory region
+        'regulatory_region' =>17, # snpEff-specific effect that should really be regulatory_region_variant
+        'feature_elongation' => 18, # A sequence variant that causes the extension of a genomic feature, with regard to the reference sequence
+        'feature_truncation' => 18, # A sequence variant that causes the reduction of a genomic feature, with regard to the reference sequence
+        'intergenic_variant' => 19, # A sequence variant located in the intergenic region, between genes
+        'intergenic_region' => 19, # snpEff-specific effect that should really be intergenic_variant
+        '' => 20
+    );
+    unless( defined $effectPriority{$effect} ) {
+        warn "WARNING: Unrecognized effect \"$effect\". Assigning lowest priority!\n";
+        return 20;
+    }
+    return $effectPriority{$effect};
+}
+
+# Prioritize the transcript biotypes that variants are annotated to, based on disease significance:
+# All possible biotypes are defined here: https://www.gencodegenes.org/pages/biotypes.html
+sub GetBiotypePriority {
+    my ( $biotype ) = @_;
+    $biotype = '' unless( defined $biotype );
+    my %biotype_priority = (
+        'protein_coding' => 1, # Contains an open reading frame (ORF)
+        'LRG_gene' => 2, # Gene in a "Locus Reference Genomic" region known to have disease-related sequence variations
+        'IG_C_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT
+        'IG_D_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT
+        'IG_J_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT
+        'IG_LV_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT
+        'IG_V_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT
+        'TR_C_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT
+        'TR_D_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT
+        'TR_J_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT
+        'TR_V_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT
+        'miRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'snRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'snoRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'ribozyme' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'tRNA' => 3, #Added by Y. Boursin
+        'sRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'scaRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'rRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'scRNA' => 3, # Non-coding RNA predicted using sequences from Rfam and miRBase
+        'lincRNA' => 3, # Long, intervening noncoding (linc) RNAs, that can be found in evolutionarily conserved, intergenic regions
+        'lncRNA' => 3, # Replaces 3prime_overlapping_ncRNA, antisense, bidirectional_promoter_lncRNA, lincRNA, macro_lncRNA, non_coding, processed_transcript, sense_intronic and sense_overlapping
+        'bidirectional_promoter_lncrna' => 3, # A non-coding locus that originates from within the promoter region of a protein-coding gene, with transcription proceeding in the opposite direction on the other strand
+        'bidirectional_promoter_lncRNA' => 3, # A non-coding locus that originates from within the promoter region of a protein-coding gene, with transcription proceeding in the opposite direction on the other strand
+        'known_ncrna' => 4,
+        'vaultRNA' => 4, # Short non coding RNA genes that form part of the vault ribonucleoprotein complex
+        'macro_lncRNA' => 4, # unspliced lncRNAs that are several kb in size
+        'Mt_tRNA' => 4, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'Mt_rRNA' => 4, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'antisense' => 5, # Has transcripts that overlap the genomic span (i.e. exon or introns) of a protein-coding locus on the opposite strand
+        'antisense_RNA' => 5, # Alias for antisense (Y. Boursin)
+        'sense_intronic' => 5, # Long non-coding transcript in introns of a coding gene that does not overlap any exons
+        'sense_overlapping' => 5, # Long non-coding transcript that contains a coding gene in its intron on the same strand
+        '3prime_overlapping_ncrna' => 5, # Transcripts where ditag and/or published experimental data strongly supports the existence of short non-coding transcripts transcribed from the 3'UTR
+        '3prime_overlapping_ncRNA' => 5, # Transcripts where ditag and/or published experimental data strongly supports the existence of short non-coding transcripts transcribed from the 3'UTR
+        'misc_RNA' => 5, # Non-coding RNA predicted using sequences from RFAM and miRBase
+        'non_coding' => 5, # Transcript which is known from the literature to not be protein coding
+        'regulatory_region' => 6, # A region of sequence that is involved in the control of a biological process
+        'disrupted_domain' => 6, # Otherwise viable coding region omitted from this alternatively spliced transcript because the splice variation affects a region coding for a protein domain
+        'processed_transcript' => 6, # Doesn't contain an ORF
+        'TEC' => 6, # To be Experimentally Confirmed. This is used for non-spliced EST clusters that have polyA features. This category has been specifically created for the ENCODE project to highlight regions that could indicate the presence of protein coding genes that require experimental validation, either by 5' RACE or RT-PCR to extend the transcripts, or by confirming expression of the putatively-encoded peptide with specific antibodies
+        'TF_binding_site' => 7, # A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex
+        'CTCF_binding_site' =>7, # A transcription factor binding site with consensus sequence CCGCGNGGNGGCAG, bound by CCCTF-binding factor
+        'promoter_flanking_region' => 7, # A region immediately adjacent to a promoter which may or may not contain transcription factor binding sites
+        'enhancer' => 7, # A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter
+        'promoter' => 7, # A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery
+        'open_chromatin_region' => 7, # A DNA sequence that in the normal state of the chromosome corresponds to an unfolded, un-complexed stretch of double-stranded DNA
+        'retained_intron' => 7, # Alternatively spliced transcript believed to contain intronic sequence relative to other, coding, variants
+        'nonsense_mediated_decay' => 7, # If the coding sequence (following the appropriate reference) of a transcript finishes >50bp from a downstream splice site then it is tagged as NMD. If the variant does not cover the full reference coding sequence then it is annotated as NMD if NMD is unavoidable i.e. no matter what the exon structure of the missing portion is the transcript will be subject to NMD
+        'non_stop_decay' => 7, # Transcripts that have polyA features (including signal) without a prior stop codon in the CDS, i.e. a non-genomic polyA tail attached directly to the CDS without 3' UTR. These transcripts are subject to degradation
+        'ambiguous_orf' => 7, # Transcript believed to be protein coding, but with more than one possible open reading frame
+        'pseudogene' => 8, # Have homology to proteins but generally suffer from a disrupted coding sequence and an active homologous gene can be found at another locus. Sometimes these entries have an intact coding sequence or an open but truncated ORF, in which case there is other evidence used (for example genomic polyA stretches at the 3' end) to classify them as a pseudogene. Can be further classified as one of the following
+        'processed_pseudogene' => 8, # Pseudogene that lack introns and is thought to arise from reverse transcription of mRNA followed by reinsertion of DNA into the genome
+        'polymorphic_pseudogene' => 8, # Pseudogene owing to a SNP/DIP but in other individuals/haplotypes/strains the gene is translated
+        'retrotransposed' => 8, # Pseudogene owing to a reverse transcribed and re-inserted sequence
+        'translated_processed_pseudogene' => 8, # Pseudogenes that have mass spec data suggesting that they are also translated
+        'translated_unprocessed_pseudogene' => 8, # Pseudogenes that have mass spec data suggesting that they are also translated
+        'transcribed_processed_pseudogene' => 8, # Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression
+        'transcribed_unprocessed_pseudogene' => 8, # Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression
+        'transcribed_unitary_pseudogene' => 8, #Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression
+        'unitary_pseudogene' => 8, # A species specific unprocessed pseudogene without a parent gene, as it has an active orthologue in another species
+        'unprocessed_pseudogene' => 8, # Pseudogene that can contain introns since produced by gene duplication
+        'Mt_tRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'tRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'snoRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'snRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'scRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'rRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'misc_RNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'miRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline
+        'IG_C_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'IG_D_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'IG_J_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'IG_V_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'TR_J_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'TR_V_pseudogene' => 8, # Inactivated immunoglobulin gene
+        'artifact' => 9, # Used to tag mistakes in the public databases (Ensembl/SwissProt/Trembl)
+        '' => 10
+    );
+    unless( defined $biotype_priority{$biotype} ) {
+        warn "WARNING: Unrecognized biotype \"$biotype\". Assigning lowest priority!\n";
+        return 10;
+    }
+    return $biotype_priority{$biotype};
+}
+
+# Check for missing or crappy arguments
+unless( @ARGV and $ARGV[0] =~ m/^-/ ) {
+    pod2usage( -verbose => 0, -message => "$0: Missing or invalid arguments!\n", -exitval => 2 );
+}
+
+# Parse options and print usage if there is a syntax error, or if usage was explicitly requested
+my ( $man, $help, $verbose ) = ( 0, 0, 0 );
+my ( $input_vcf, $output_maf, $tmp_dir, $custom_enst_file );
+my ( $vcf_tumor_id, $vcf_normal_id, $remap_chain );
+my ( $samtools, $tabix, $liftover ) ;
+GetOptions(
+    'help!' => \$help,
+    'man!' => \$man,
+    'verbose!' => \$verbose,
+    'input-vcf=s' => \$input_vcf,
+    'output-maf=s' => \$output_maf,
+    'tmp-dir=s' => \$tmp_dir,
+    'tumor-id=s' => \$tumor_id,
+    'normal-id=s' => \$normal_id,
+    'vcf-tumor-id=s' => \$vcf_tumor_id,
+    'vcf-normal-id=s' => \$vcf_normal_id,
+    'custom-enst=s' => \$custom_enst_file,
+    'vep-path=s' => \$vep_path,
+    'vep-data=s' => \$vep_data,
+    'vep-forks=s' => \$vep_forks,
+    'vep-custom=s' => \$vep_custom,
+    'vep-config=s' => \$vep_config,
+    'vep-overwrite!' => \$vep_overwrite,
+    'buffer-size=i' => \$buffer_size,
+    'any-allele!' => \$any_allele,
+    'inhibit-vep!' => \$inhibit_vep,
+    'online!' => \$online,
+    'ref-fasta=s' => \$ref_fasta,
+    'species=s' => \$species,
+    'ncbi-build=s' => \$ncbi_build,
+    'cache-version=s' => \$cache_version,
+    'maf-center=s' => \$maf_center,
+    'retain-info=s' => \$retain_info,
+    'retain-fmt=s' => \$retain_fmt,
+    'retain-ann=s' => \$retain_ann,
+    'min-hom-vaf=s' => \$min_hom_vaf,
+    'remap-chain=s' => \$remap_chain,
+    'max-subpop-af=f' => \$max_subpop_af,
+    'samtools-exec=s' => \$samtools,
+    'tabix-exec=s' => \$tabix,
+    'liftover-exec=s' => \$liftover,
+
+) or pod2usage( -verbose => 1, -input => \*DATA, -exitval => 2 );
+
+if( $man ) {
+    pod2usage( -verbose => 2, exitval => 0)
+}
+pod2usage( -verbose => 1, -input => \*DATA, -exitval => 0 ) if( $help );
+
+# Search the PATH for samtools and tabix, unless specified on the command line.
+# Error out if either is missing from PATH or the specified exec doesn't exist.
+
+( $samtools ) = map{chomp; $_}`which samtools` unless ( $samtools ) ;
+( $samtools ) or die "ERROR: Please install samtools on your PATH, or specify --samtools-exec\n";
+( -e $samtools ) or die "ERROR: Specified samtools: <$samtools> does not exist\n";
+
+( $tabix ) = map{chomp; $_}`which tabix` unless ( $tabix );
+( $tabix ) or die "ERROR: Please install tabix on your PATH, or specify --tabix-exec\n";
+( -e $tabix ) or die "ERROR: Specified tabix: <$tabix> does not exist\n";
+
+if( $remap_chain ) {
+    # When we're attempting a remap:
+    # Search the PATH for a liftOver executable, unless specified on the command line.
+    # Error out if liftOver is missing from PATH or the specified exec doesn't exist.
+
+    ( $liftover ) = map{chomp; $_}`which liftOver` unless ( $liftover );
+    ( $liftover ) or die "ERROR: Please install liftOver on your PATH or specify --liftover-exec\n";
+    ( -e $liftover ) or die "ERROR: Specified liftOver: <$liftover> does not exist\n";
+}
+
+# Check if required arguments are missing or problematic
+( defined $input_vcf and defined $output_maf ) or die "ERROR: Both input-vcf and output-maf must be defined!\n";
+( -s $input_vcf ) or die "ERROR: Provided --input-vcf is missing or empty: $input_vcf\n";
+( -s $ref_fasta ) or die "ERROR: Provided --ref-fasta is missing or empty: $ref_fasta\n";
+( $input_vcf !~ m/\.(gz|bz2|bcf)$/ ) or die "ERROR: Unfortunately, --input-vcf cannot be in a compressed format\n";
+
+# Unless specified, assume that the VCF uses the same sample IDs that the MAF will contain
+$vcf_tumor_id = $tumor_id unless( $vcf_tumor_id );
+$vcf_normal_id = $normal_id unless( $vcf_normal_id );
+
+# Load up the custom isoform overrides if provided:
+my %custom_enst;
+if( $custom_enst_file ) {
+    ( -s $custom_enst_file ) or die "ERROR: Provided --custom-enst file is missing or empty: $custom_enst_file\n";
+    warn "STATUS: Reading --custom-enst $custom_enst_file...\n" if( $verbose );
+    %custom_enst = map{chomp; ( $_, 1 )}`grep -v ^# $custom_enst_file | cut -f1`;
+}
+
+# Create a folder for the intermediate VCFs if user-defined, or default to the input VCF's folder
+if( defined $tmp_dir ) {
+    mkpath( $tmp_dir ) unless( -d $tmp_dir );
+}
+else {
+    $tmp_dir = substr( $input_vcf, 0, rindex( $input_vcf, "/" )) if( $input_vcf =~ m/\// );
+    $tmp_dir = "." unless( $tmp_dir ); # In case the input VCF is in the current working directory
+}
+
+# Also figure out the base name of the input VCF, cuz we'll be naming a lot of files based on that
+my $input_name = substr( $input_vcf, rindex( $input_vcf, "/" ) + 1 );
+$input_name =~ s/(\.vcf)*$//;
+
+# If the VCF contains SVs, split the breakpoints into separate lines before passing to VEP
+my ( $split_svs, $var_count ) = ( 0, 0 );
+my $orig_vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n";
+my $split_vcf_fh = IO::File->new( "$tmp_dir/$input_name.split.vcf", "w" ) or die "ERROR: Couldn't open VCF: $tmp_dir/$input_name.split.vcf!\n";
+
+warn "STATUS: Preprocessing $input_vcf: split SV breakpoints before passing to VEP...\n" if( $verbose );
+
+while( my $line = $orig_vcf_fh->getline ) {
+    # If the file uses Mac OS 9 newlines, quit with an error
+    ( $line !~ m/\r$/ ) or die "ERROR: Your VCF uses CR line breaks, which we can't support. Please use LF or CRLF.\n";
+
+    if( $line =~ m/^#/ ) {
+        $split_vcf_fh->print( $line ); # Write header lines unchanged
+        next;
+    }
+
+    chomp( $line );
+    ++$var_count;
+    my @cols = split( "\t", $line );
+    my %info = map {( m/=/ ? ( split( /=/, $_, 2 )) : ( $_, "1" ))} split( /\;/, $cols[7] );
+    if( $info{SVTYPE} ){
+        # Remove SVTYPE tag if REF/ALT alleles are defined, or VEP won't report transcript effects
+        if( $cols[3]=~m/^[ACGTN]+$/i and $cols[4]=~m/^[ACGTN,]+$/i ) {
+            $cols[7]=~s/(SVTYPE=\w+;|;SVTYPE=\w+|SVTYPE=\w+)//;
+            $split_vcf_fh->print( join( "\t", @cols ), "\n" );
+        }
+        # For legit SVs except insertions, split them into two separate breakpoint events
+        elsif( $info{SVTYPE}=~m/^(BND|TRA|DEL|DUP|INV)$/ ) {
+            $split_svs = 1;
+            # Don't tell VEP it's an SV, by removing the SVTYPE tag
+            $cols[7]=~s/(SVTYPE=\w+;|;SVTYPE=\w+|SVTYPE=\w+)//;
+            # Rename two SV specific INFO keys to something friendlier
+            $cols[7]=~s/CT=([35]to[35])/Frame=$1/;
+            $cols[7]=~s/SVMETHOD=([\w.]+)/Method=$1/;
+            $cols[4] = "<" . $info{SVTYPE} . ">";
+            # Fetch the REF allele at the second breakpoint using samtools faidx
+            my $ref2 = `$samtools faidx $ref_fasta $info{CHR2}:$info{END}-$info{END} | grep -v ^\\>`;
+            chomp( $ref2 );
+            $split_vcf_fh->print( join( "\t", $info{CHR2}, $info{END}, $cols[2], ( $ref2 ? $ref2 : $cols[3] ), @cols[4..$#cols] ), "\n" );
+            $split_vcf_fh->print( join( "\t", @cols ), "\n" );
+        }
+        $input_vcf = "$tmp_dir/$input_name.split.vcf";
+    }
+    else {
+        $split_vcf_fh->print( join( "\t", @cols ), "\n" );
+    }
+}
+$split_vcf_fh->close;
+$orig_vcf_fh->close;
+
+# Delete the split.vcf created above if we didn't find any variants with the SVTYPE tag
+unlink( "$tmp_dir/$input_name.split.vcf" ) if( $input_vcf ne "$tmp_dir/$input_name.split.vcf" );
+
+# Make sure the --online option is only used with small GRCh38 VCFs
+if( $online ) {
+    ( $var_count < 100 and $ncbi_build eq "GRCh38" ) or die "ERROR: Option --online can only be used with GRCh38 VCFs listing <100 events\n";
+}
+
+# If a liftOver chain was provided, remap and switch the input VCF before annotation
+my ( %remap );
+if( $remap_chain ) {
+    warn "STATUS: Running liftOver...\n" if( $verbose );
+
+    # Make a BED file from the VCF, run liftOver on it, and create a hash mapping old to new loci
+    `grep -v ^# $input_vcf | cut -f1,2 | awk '{OFS="\\t"; print \$1,\$2-1,\$2,\$1":"\$2}' > $tmp_dir/$input_name.bed`;
+    %remap = map{chomp; my @c=split("\t"); ($c[3], "$c[0]:$c[2]")}`$liftover $tmp_dir/$input_name.bed $remap_chain /dev/stdout /dev/null 2> /dev/null`;
+    unlink( "$tmp_dir/$input_name.bed" );
+
+    # Create a new VCF in the temp folder, with remapped loci on which we'll run annotation
+    my $orig_vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n";
+    my $remap_vcf_fh = IO::File->new( "$tmp_dir/$input_name.remap.vcf", "w" ) or die "ERROR: Couldn't open VCF: $tmp_dir/$input_name.remap.vcf!\n";
+    while( my $line = $orig_vcf_fh->getline ) {
+        if( $line =~ m/^#/ ) {
+            $remap_vcf_fh->print( $line ); # Write header lines unchanged
+        }
+        else {
+            chomp( $line );
+            my @cols = split( "\t", $line );
+            my $locus = $cols[0] . ":" . $cols[1];
+            if( defined $remap{$locus} ) {
+                # Retain original variant under INFO, so we can append it later to the output MAF
+                $cols[7] = ( !$cols[7] or $cols[7] eq "." ? "" : "$cols[7];" ) . "REMAPPED_POS=" . join( ":", @cols[0,1,3,4] );
+                @cols[0,1] = split( ":", $remap{$locus} );
+                $remap_vcf_fh->print( join( "\t", @cols ), "\n" );
+            }
+            else {
+                warn "WARNING: Skipping variant at $locus; Unable to liftOver using $remap_chain\n";
+            }
+        }
+    }
+    $remap_vcf_fh->close;
+    $orig_vcf_fh->close;
+    $input_vcf = "$tmp_dir/$input_name.remap.vcf";
+}
+
+# Before running annotation, let's pull flanking reference bps for each variant to do some checks
+warn "STATUS: Pulling flanking reference bps for checks...\n" if( $verbose );
+my $vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n";
+my ( %ref_bps, @ref_regions, %uniq_regions, %flanking_bps );
+while( my $line = $vcf_fh->getline ) {
+    # Skip header lines, and pull variant loci to pass to samtools later
+    next if( $line =~ m/^#/ );
+    chomp( $line );
+    my ( $chr, $pos, undef, $ref ) = split( "\t", $line );
+    # Create a region that spans the length of the reference allele and 1bp flanks around it
+    my $region = "$chr:" . ( $pos - 1 ) . "-" . ( $pos + length( $ref ));
+    $ref_bps{$region} = $ref;
+    push( @ref_regions, $region );
+    $uniq_regions{$region} = 1;
+}
+$vcf_fh->close;
+
+# samtools runs faster when passed many loci at a time, but limited to around 125k args, at least
+# on CentOS 6. If there are too many loci, split them into smaller chunks and run separately
+warn "STATUS: Splitting loci into smaller chunks to run separately...\n" if( $verbose );
+my ( $lines, @regions_split ) = ( "", ());
+my @regions = keys %uniq_regions;
+my $chr_prefix_in_use = ( @regions and $regions[0] =~ m/^chr/ ? 1 : 0 );
+push( @regions_split, [ splice( @regions, 0, $buffer_size ) ] ) while @regions;
+map{ my $region = join( " ", sort @{$_} ); $lines .= `$samtools faidx $ref_fasta $region` } @regions_split;
+foreach my $line ( grep( length, split( ">", $lines ))) {
+    # Carefully split this FASTA entry, properly chomping newlines for long indels
+    my ( $region, $bps ) = split( "\n", $line, 2 );
+    $bps =~ s/\r|\n//g;
+    if( $bps ){
+        $bps = uc( $bps );
+        $flanking_bps{$region} = $bps;
+    }
+}
+
+# If flanking_bps is entirely empty, then it's most likely that the user chose the wrong ref-fasta
+# Or it's also possible that an outdated samtools was unable to parse the gzipped FASTA files
+# ::NOTE:: If input had no variants, don't break here, so we can continue to create an empty MAF
+( !@regions_split or %flanking_bps ) or die "ERROR: You're either using an outdated samtools, or --ref-fasta is not the same genome build as your --input-vcf.";
+
+# For each variant locus and reference allele in the input VCF, report any problems
+warn "STATUS: Reporting any problems on variant loci and reference alleles...\n" if( $verbose );
+foreach my $region ( @ref_regions ) {
+    my $ref = $ref_bps{$region};
+    my ( $locus ) = map{ my ( $chr, $pos ) = split( ":" ); ++$pos; "$chr:$pos" } split( "-", $region );
+    if( !defined $flanking_bps{$region} ) {
+        warn "WARNING: Couldn't retrieve bps around $locus from reference FASTA: $ref_fasta\n";
+    }
+    elsif( $flanking_bps{$region} !~ m/^[ACGTN]+$/ ) {
+        warn "WARNING: Retrieved invalid bps " . $flanking_bps{$region} . " around $locus from reference FASTA: $ref_fasta\n";
+    }
+    elsif( $ref ne substr( $flanking_bps{$region}, 1, length( $ref ))) {
+        warn "WARNING: Reference allele $ref at $locus doesn't match " .
+            substr( $flanking_bps{$region}, 1, length( $ref )) . " (flanking bps: " .
+            $flanking_bps{$region} . ") from reference FASTA: $ref_fasta\n";
+    }
+}
+
+# Annotate variants in given VCF to all possible transcripts, unless user requested to skip VEP
+my $output_vcf = $input_vcf;
+unless( $inhibit_vep ) {
+    $output_vcf = ( $remap_chain ? "$tmp_dir/$input_name.remap.vep.vcf" : "$tmp_dir/$input_name.vep.vcf" );
+    warn "STATUS: Running VEP and writing to: $output_vcf\n";
+    # Make sure we can find the VEP script
+    my $vep_script = ( -s "$vep_path/vep" ? "$vep_path/vep" : "$vep_path/variant_effect_predictor.pl" );
+    ( -s $vep_script ) or die "ERROR: Cannot find VEP script under: $vep_path\n";
+
+    # Contruct VEP command using some default options and run it
+    my $vep_cmd = "$perl_bin $vep_script --species $species --assembly $ncbi_build";
+    $vep_cmd .= " --no_progress" unless( $verbose );
+    $vep_cmd .= " --no_stats --buffer_size $buffer_size --sift b --ccds";
+    $vep_cmd .= " --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical";
+    $vep_cmd .= " --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1";
+    $vep_cmd .= " --check_existing --total_length --allele_number --no_escape --xref_refseq";
+    $vep_cmd .= " --failed 1 --vcf --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length";
+    $vep_cmd .= " --dir $vep_data --fasta $ref_fasta --format vcf --input_file $input_vcf --output_file $output_vcf";
+    $vep_cmd .= " --force_overwrite" if( $vep_overwrite );
+    # Change options based on whether we are running in offline mode or not
+    $vep_cmd .= ( $online ? " --database --host useastdb.ensembl.org" : " --offline --pubmed" );
+    # VEP barks if --fork is set to 1. So don't use this argument unless it's >1
+    $vep_cmd .= " --fork $vep_forks" if( $vep_forks > 1 );
+    # Add --custom if requested at command line
+    $vep_cmd .= " --custom $vep_custom" if ($vep_custom);
+    # Add --config if requested at command line
+    $vep_cmd .= " --config $vep_config" if ($vep_config);
+    # Require allele match for co-located variants unless user-rejected or we're using a newer VEP
+    $vep_cmd .= " --check_allele" unless( $any_allele or $vep_script =~ m/vep$/ );
+    # Add --cache-version only if the user specifically asked for a version
+    $vep_cmd .= " --cache_version $cache_version" if( $cache_version );
+    # Add options that only work on human variants
+    if( $species eq "homo_sapiens" ) {
+        # Slight change in options if in offline mode, or if using the newer VEP
+        $vep_cmd .= " --polyphen b" . ( $vep_script =~ m/vep$/ ? " --af" : " --gmaf" );
+        $vep_cmd .= ( $vep_script =~ m/vep$/ ? " --af_1kg --af_esp --af_gnomad" : " --maf_1kg --maf_esp" ) unless( $online );
+    }
+    # Do not use the --regulatory option in situations where we know it will break
+    $vep_cmd .= " --regulatory" unless( $species eq "canis_familiaris" or $online );
+
+    warn "STATUS: Running this VEP command:  \n". wrap( "  ", "    ", $vep_cmd. "\n" ) if( $verbose );
+
+    # Make sure it ran without error codes
+    system( $vep_cmd ) == 0 or die "\nERROR: Failed to run the VEP annotator! Command: $vep_cmd\n";
+    ( -s $output_vcf ) or warn "WARNING: VEP-annotated VCF file is missing or empty: $output_vcf\n";
+
+    warn "STATUS: Finished with vep...\n" if( $verbose );
+}
+
+# Define default MAF Header (https://wiki.nci.nih.gov/x/eJaPAQ) with our vcf2maf additions
+my @maf_header = qw(
+    Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand
+    Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2
+    dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode
+    Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2
+    Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status
+    Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score
+    BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID
+    Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects
+);
+
+# Add extra annotation columns to the MAF in a consistent order
+my @ann_cols = qw( Allele Gene Feature Feature_type Consequence cDNA_position CDS_position
+    Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL
+    SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen
+    EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC
+    PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL
+    HGVS_OFFSET PHENO MINIMISED GENE_PHENO FILTER flanking_bps vcf_id vcf_qual gnomAD_AF gnomAD_AFR_AF
+    gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF );
+
+# push any requested custom VEP annotations from the CSQ/ANN section into @ann_cols
+if ($retain_ann) {
+    push @ann_cols, split(',',$retain_ann);
+}
+my @ann_cols_format; # To store the actual order of VEP data, that may differ between runs
+push( @maf_header, @ann_cols );
+
+# Add original VCF POS column header
+push( @maf_header, "vcf_pos" );
+
+# If the user has INFO fields they want to retain, create additional columns for those
+my @addl_info_cols = ();
+if( $retain_info or $remap_chain or $split_svs ) {
+    # But let's not overwrite existing columns with the same name
+    my %maf_cols = map{ my $c = lc; ( $c, 1 )} @maf_header;
+    @addl_info_cols = grep{ my $c = lc; !$maf_cols{$c}} split( ",", $retain_info );
+    # If a remap-chain was used, add a column to retain the original chr:pos:ref:alt
+    push( @addl_info_cols, "REMAPPED_POS" ) if( $remap_chain );
+    # If we had to split some SVs earlier, add some columns with some useful info about SVs
+    push( @addl_info_cols, qw( Fusion Method Frame CONSENSUS )) if( $split_svs );
+    push( @maf_header, @addl_info_cols );
+}
+
+# If the user has FORMAT fields they want to retain, create additional columns for those
+my @addl_fmt_cols = ();
+if( $retain_fmt ) {
+    foreach my $fmt_tag ( split( ",", $retain_fmt )) {
+        # Create 2 columns for tumor/normal, except if a column name is already used
+        my ( $tc, $nc ) = ( "t_$fmt_tag", "n_$fmt_tag" );
+        my %maf_cols = map{ my $c = lc; ( $c, 1 ) } @maf_header;
+        if(!$maf_cols{lc($tc)}) { push (@addl_fmt_cols, $tc); }
+        if(!$maf_cols{lc($nc)}) { push (@addl_fmt_cols, $nc); }
+    }
+    push( @maf_header, @addl_fmt_cols );
+}
+
+# Locate and load the file mapping ENSG IDs to Entrez IDs
+my ( $script_dir ) = $0 =~ m/^(.*)\/vcf2maf/;
+$script_dir = "." unless( $script_dir );
+
+my $entrez_id_file = "$script_dir/data/ensg_to_entrez_id_map_ensembl_feb2014.tsv";
+my %entrez_id_map = ();
+if( -s $entrez_id_file ) {
+    %entrez_id_map = map{chomp; split("\t")} `grep -hv ^# $entrez_id_file`;
+}
+
+# Parse through each variant in the annotated VCF, pull out CSQ/ANN from the INFO column, and choose
+# one transcript per variant whose annotation will be used in the MAF
+warn "STATUS: Parsing variants in annotated VCF...\n" if( $verbose );
+my $maf_fh = IO::File->new( $output_maf, ">" ) or die "ERROR: Couldn't open --output-maf: $output_maf!\n";
+$maf_fh->print( "#version 2.4\n" . join( "\t", @maf_header ), "\n" ); # Print MAF header
+( -s $output_vcf ) or exit; # Warnings on this were printed earlier, but quit here, only after a blank MAF is created
+my $annotated_vcf_fh = IO::File->new( $output_vcf ) or die "ERROR: Couldn't open annotated VCF: $output_vcf!\n";
+my ( $vcf_tumor_idx, $vcf_normal_idx, %sv_pair );
+while( my $line = $annotated_vcf_fh->getline ) {
+
+    # Parse out the VEP CSQ/ANN format, which seems to differ between runs
+    if( $line =~ m/^##INFO=<ID=(CSQ|ANN).*: '?([^"']+)["']/ ) {
+        # Use this as the expected column order of VEP annotation, unless we already got it from CSQ
+        @ann_cols_format = map{s/\s//g; $_} split( /\|/, $2 ) unless( @ann_cols_format and $1 eq "ANN" );
+    }
+
+    # Skip all other header lines
+    next if( $line =~ m/^##/ );
+
+    chomp( $line );
+    my ( $chrom, $pos, $var_id, $ref, $alt, $var_qual, $filter, $info_line, $format_line, @rest ) = split( "\t", $line );
+
+    # Set ID, QUAL, and FILTER to "." unless defined and non-empty
+    $var_id = "." unless( defined $var_id and $var_id ne "" );
+    $var_qual = "." unless( defined $var_qual and $var_qual ne "" );
+    $filter = "." unless( defined $filter and $filter ne "" );
+
+    # If FORMATted genotype fields are available, find the sample with the variant, and matched normal
+    if( $line =~ m/^#CHROM/ ) {
+        if( $format_line and scalar( @rest ) > 0 ) {
+            for( my $i = 0; $i <= $#rest; ++$i ) {
+                $vcf_tumor_idx = $i if( $rest[$i] eq $vcf_tumor_id );
+                $vcf_normal_idx = $i if( $rest[$i] eq $vcf_normal_id );
+            }
+            ( defined $vcf_tumor_idx ) or warn "WARNING: No genotype column for $vcf_tumor_id in VCF!\n";
+            ( defined $vcf_normal_idx ) or warn "WARNING: No genotype column for $vcf_normal_id in VCF!\n";
+        }
+        next;
+    }
+
+    # Parse out the data in the info column, and store into a hash
+    my %info = map {( $_, "1" )} grep { !m/=/ } split( /\;/, $info_line );
+    map { my ( $key, $val ) = split( /=/, $_, 2 ); $info{$key} .= ( $info{$key} ? ",$val" : $val ) unless( $info{$key} and grep { /^$val$/ } split( ",", $info{$key} ))} grep { m/=/ } split( /\;/, $info_line );
+
+    # By default, the variant allele is the first (usually the only) allele listed under ALT. If
+    # there are >1 alleles in ALT, choose the first non-REF allele listed under tumor GT, that is
+    # also not seen under normal GT. If tumor GT is undefined or ambiguous, choose the tumor allele
+    # with the most supporting read depth, if available.
+    my @alleles = ( $ref, split( /,/, $alt ));
+    my $var_allele_idx = 1;
+
+    # Parse out info from the normal genotype field
+    my ( %nrm_info, @nrm_depths );
+    if( defined $vcf_normal_idx ) {
+        my @format_keys = split( /\:/, $format_line );
+        my $idx = 0;
+        %nrm_info = map {( $format_keys[$idx++], $_ )} split( /\:/, $rest[$vcf_normal_idx] );
+    }
+
+    # Parse out info from the tumor genotype field
+    my ( %tum_info, @tum_depths );
+    if( defined $vcf_tumor_idx ) {
+        my @format_keys = split( /\:/, $format_line );
+        my $idx = 0;
+        %tum_info = map {( $format_keys[$idx++], $_ )} split( /\:/, $rest[$vcf_tumor_idx] );
+
+        # If possible, parse the tumor genotype to identify the variant allele
+        if( defined $tum_info{GT} and $tum_info{GT} ne "." and $tum_info{GT} ne "./." ) {
+            my @tum_gt = split( /[\/|]/, $tum_info{GT} );
+            # Default to the first non-REF allele seen in tumor GT
+            ( $var_allele_idx ) = grep {$_ ne "0"} @tum_gt;
+            # If possible, choose the first non-REF tumor allele that is also not in normal GT
+            if( defined $nrm_info{GT} and $nrm_info{GT} ne "." and $nrm_info{GT} ne "./." ) {
+                my %nrm_gt = map {( $_, 1 )} split( /[\/|]/, $nrm_info{GT} );
+                ( $var_allele_idx ) = grep {$_ ne "0" and !$nrm_gt{$_}} @tum_gt;
+            }
+            # If GT was unhelpful, default to the first ALT allele and set GT to undefined
+            if( !defined $var_allele_idx or $var_allele_idx !~ m/^\d+$/ or $var_allele_idx >= scalar( @alleles )) {
+                $var_allele_idx = 1;
+                $tum_info{GT} = "./.";
+            }
+        }
+
+        # Standardize tumor AD and DP based on data in the genotype fields
+        FixAlleleDepths( \@alleles, $var_allele_idx, \%tum_info );
+        @tum_depths = split( ",", $tum_info{AD} );
+
+        # If genotype is undefined, use the allele depths collected to choose the major variant allele
+        unless( defined $tum_info{GT} and $tum_info{GT} ne '.' and $tum_info{GT} ne "./." ) {
+            # The first depth listed belongs to the reference allele. Of the rest, find the largest
+            for( my $i = 1; $i <= $#tum_depths; ++$i ) {
+                $var_allele_idx = $i if( $tum_depths[$i] and $tum_depths[$i] > $tum_depths[$var_allele_idx] );
+            }
+            $tum_info{GT} = "./.";
+            if( defined $tum_info{DP} and $tum_info{DP} ne '.' and $tum_info{DP} != 0 and defined $tum_depths[$var_allele_idx] ) {
+                my $vaf = $tum_depths[$var_allele_idx] / $tum_info{DP};
+                $tum_info{GT} = ( $vaf < $min_hom_vaf ? "0/1" : "1/1" );
+            }
+        }
+    }
+
+    # Set the variant allele to whatever we selected above
+    my $var = $alleles[$var_allele_idx];
+
+    # Standardize normal AD and DP based on data in the genotype fields
+    if( defined $vcf_normal_idx ) {
+        FixAlleleDepths( \@alleles, $var_allele_idx, \%nrm_info );
+        @nrm_depths = split( ",", $nrm_info{AD} );
+        $nrm_info{GT} = "./." unless( defined $nrm_info{GT} and $nrm_info{GT} ne '.' );
+    }
+
+    # Figure out the appropriate start/stop loci and variant type/allele to report in the MAF
+    my $start = my $stop = my $var_type = my $inframe = "";
+    my ( $ref_length, $var_length ) = ( length( $ref ), length( $var ));
+    # Backup the VCF-style position and REF/ALT alleles, so we can use it later
+    my ( $vcf_pos, $vcf_ref, $vcf_var ) = ( $pos, $ref, $var );
+    # Remove any prefixed reference bps from all alleles, using "-" for simple indels
+    while( $ref and $var and substr( $ref, 0, 1 ) eq substr( $var, 0, 1 ) and $ref ne $var ) {
+        ( $ref, $var, @alleles ) = map{$_ = substr( $_, 1 ); ( $_ ? $_ : "-" )} ( $ref, $var, @alleles );
+        --$ref_length; --$var_length; ++$pos;
+    }
+    # Handle SNPs, DNPs, TNPs, or anything larger (ONP)
+    if( $ref_length == $var_length ) {
+        ( $start, $stop ) = ( $pos, $pos + $var_length - 1 );
+        my %np_type = qw( 1 SNP 2 DNP 3 TNP );
+        $var_type = ( $var_length > 3 ? "ONP" : $np_type{$var_length} );
+    }
+    # Handle all indels, including those complex ones which contain substitutions
+    elsif( $ref_length != $var_length ) {
+        if( $ref_length < $var_length ) { # Handle insertions, and the special case for complex ones
+            ( $start, $stop ) = (( $ref eq "-" ? $pos - 1 : $pos ), ( $ref eq "-" ? $pos : $pos + $ref_length - 1 ));
+            $var_type = "INS";
+        }
+        else { # Handle deletions
+            ( $start, $stop ) = ( $pos, $pos + $ref_length - 1 );
+            $var_type = "DEL";
+        }
+        $inframe = ( abs( $ref_length - $var_length ) % 3 == 0 ? 1 : 0 );
+    }
+
+    my @all_effects; # A list of effects of this variant on all possible transcripts
+    my $maf_effect; # A single effect per variant to report in the standard MAF columns
+    my %maf_line = map{( $_, '' )} @maf_header; # Initialize MAF fields with blank strings
+
+    # VEP provides a comma-delimited list of consequences, with pipe-delim details per consequence
+    # It replaces ',' in details with '&'. We'll assume that all '&'s we see, were formerly commas
+    # "Consequence" might list multiple effects on the same transcript e.g. missense,splice_region
+    if( $info{CSQ} or $info{ANN} ) {
+
+        my $ann_lines = ( $info{CSQ} ? $info{CSQ} : $info{ANN} );
+        foreach my $ann_line ( split( /,/, $ann_lines )) {
+            my $idx = 0;
+            my %effect = map{s/\&/,/g; ( $ann_cols_format[$idx++], ( defined $_ ? $_ : '' ))} split( /\|/, $ann_line );
+
+            # Remove transcript ID from HGVS codon/protein changes, to make it easier on the eye
+            $effect{HGVSc} =~ s/^.*:// if( $effect{HGVSc} );
+            $effect{HGVSp} =~ s/^.*:// if( $effect{HGVSp} );
+
+            # Remove the prefixed HGVSc code in HGVSp, if found
+            $effect{HGVSp} =~ s/^.*\((p\.\S+)\)/$1/ if( $effect{HGVSp} and $effect{HGVSp} =~ m/^c\./ );
+
+            # If we find any snpEff fields, rename them to the corresponding VEP field names
+            $effect{Consequence} = $effect{Annotation} if( $effect{Annotation} );
+            $effect{IMPACT} = $effect{Annotation_Impact} if( $effect{Annotation_Impact} );
+            $effect{SYMBOL} = $effect{Gene_Name} if( $effect{Gene_Name} );
+            $effect{Gene} = $effect{Gene_ID} if( $effect{Gene_ID} );
+            $effect{Feature_type} = $effect{Feature_Type} if( $effect{Feature_Type} );
+            $effect{Feature} = $effect{Feature_ID} if( $effect{Feature_ID} );
+            $effect{BIOTYPE} = $effect{Transcript_BioType} if( $effect{Transcript_BioType} );
+            $effect{HGVSc} = $effect{'HGVS.c'} if( $effect{'HGVS.c'} );
+            $effect{HGVSp} = $effect{'HGVS.p'} if( $effect{'HGVS.p'} );
+            $effect{cDNA_position} = $effect{'cDNA.pos/cDNA.length'} if( $effect{'cDNA.pos/cDNA.length'} );
+            $effect{CDS_position} = $effect{'CDS.pos/CDS.length'} if( $effect{'CDS.pos/CDS.length'} );
+            $effect{Protein_position} = $effect{'AA.pos/AA.length'} if( $effect{'AA.pos/AA.length'} );
+            $effect{DISTANCE} = $effect{Distance} if( $effect{Distance} );
+
+            # Sort consequences by decreasing order of severity, and pick the most severe one
+            $effect{Consequence} = join( ",", sort { GetEffectPriority($a) <=> GetEffectPriority($b) } split( ",", $effect{Consequence} ));
+            ( $effect{One_Consequence} ) = split( ",", $effect{Consequence} );
+
+            # When VEP fails to provide any value in Consequence, tag it as an intergenic variant
+            $effect{One_Consequence} = "intergenic_variant" unless( $effect{Consequence} );
+
+            # Create a shorter HGVS protein format using 1-letter codes
+            if( $effect{HGVSp} ) {
+                my $hgvs_p_short = $effect{HGVSp};
+                while( $hgvs_p_short and my ( $find, $replace ) = each %aa3to1 ) {
+                    eval "\$hgvs_p_short =~ s{$find}{$replace}g";
+                }
+                $effect{HGVSp_Short} = $hgvs_p_short;
+            }
+
+            # Fix HGVSp_Short, CDS_position, and Protein_position for splice acceptor/donor variants
+            if( $effect{One_Consequence} =~ m/^(splice_acceptor_variant|splice_donor_variant)$/ ) {
+                my ( $c_pos ) = $effect{HGVSc} =~ m/^c.(\d+)/;
+                if( defined $c_pos ) {
+                    $c_pos = 1 if( $c_pos < 1 ); # Handle negative cDNA positions used in 5' UTRs
+                    my $p_pos = sprintf( "%.0f", ( $c_pos + $c_pos % 3 ) / 3 );
+                    $effect{HGVSp_Short} = "p.X" . $p_pos . "_splice";
+                    $effect{CDS_position} =~ s/^-(\/\d+)$/$c_pos$1/ if( $effect{CDS_position} );
+                    $effect{Protein_position} =~ s/^-(\/\d+)$/$p_pos$1/ if( $effect{Protein_position} )
+                }
+            }
+
+            # Fix HGVSp_Short for Silent mutations, so it mentions the amino-acid and position
+            if( defined $effect{HGVSp_Short} and $effect{HGVSp_Short} eq "p.=" ) {
+                my ( $p_pos ) = $effect{Protein_position} =~ m/^(\d+)(-\d+)?\/\d+$/;
+                my $aa = $effect{Amino_acids};
+                $effect{HGVSp_Short} = "p.$aa" . $p_pos . "=";
+            }
+
+            # Copy VEP data into MAF fields that don't share the same identifier
+            $effect{Transcript_ID} = $effect{Feature};
+            $effect{Exon_Number} = $effect{EXON};
+            $effect{Hugo_Symbol} = ( $effect{SYMBOL} ? $effect{SYMBOL} : '' );
+
+            # If AF columns from the older VEP are found, rename to the newer ones for consistency
+            my %af_col = qw( GMAF AF AFR_MAF AFR_AF AMR_MAF AMR_AF ASN_MAF ASN_AF EAS_MAF EAS_AF
+                EUR_MAF EUR_AF SAS_MAF SAS_AF AA_MAF AA_AF EA_MAF EA_AF );
+            map { $effect{$af_col{$_}} = $effect{$_} if( defined $effect{$_} )} keys %af_col;
+
+            # If VEP couldn't find this variant in dbSNP/COSMIC/etc., we'll say it's "novel"
+            if( $effect{Existing_variation} ) {
+                # ::NOTE:: If seen in a DB other than dbSNP, this field will remain blank
+                $effect{dbSNP_RS} = join( ",", grep{m/^rs\d+$/} split( /,/, $effect{Existing_variation} ));
+            }
+            else {
+                $effect{dbSNP_RS} = "novel";
+            }
+
+            # Transcript_Length isn't separately reported, but can be parsed out from cDNA_position
+            ( $effect{Transcript_Length} ) = $effect{cDNA_position} =~ m/\/(\d+)$/ if( $effect{cDNA_position} );
+            $effect{Transcript_Length} = 0 unless( defined $effect{Transcript_Length} );
+
+            # Skip effects on other ALT alleles. If ALLELE_NUM is undefined (e.g. for INFO:SVTYPE), don't skip any
+            push( @all_effects, \%effect ) unless( $effect{ALLELE_NUM} and $effect{ALLELE_NUM} != $var_allele_idx );
+        }
+
+        # Sort effects first by transcript biotype, then by severity, and then by longest transcript
+        @all_effects = sort {
+            GetBiotypePriority( $a->{BIOTYPE} ) <=> GetBiotypePriority( $b->{BIOTYPE} ) ||
+            GetEffectPriority( $a->{One_Consequence} ) <=> GetEffectPriority( $b->{One_Consequence} ) ||
+            $b->{Transcript_Length} <=> $a->{Transcript_Length}
+        } @all_effects;
+
+        # Find the highest priority effect with a gene symbol i.e. the worst affected gene
+        my ( $effect_with_gene_name ) = grep { $_->{SYMBOL} } @all_effects;
+        my $maf_gene = $effect_with_gene_name->{SYMBOL} if( $effect_with_gene_name );
+
+        # If that gene has a user-preferred isoform, report the effect on that isoform
+        ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{SYMBOL} eq $maf_gene and $_->{Transcript_ID} and $custom_enst{$_->{Transcript_ID}} } @all_effects;
+
+        # If that gene has no user-preferred isoform, then use the VEP-preferred (canonical) isoform
+        ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{SYMBOL} eq $maf_gene and $_->{CANONICAL} and $_->{CANONICAL} eq "YES" } @all_effects unless( $maf_effect );
+
+        # If that gene has no VEP-preferred isoform either, then choose the worst affected user-preferred isoform with a gene symbol
+        ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{Transcript_ID} and $custom_enst{$_->{Transcript_ID}} } @all_effects unless( $maf_effect );
+
+        # If none of the isoforms are user-preferred, then choose the worst affected VEP-preferred isoform with a gene symbol
+        ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{CANONICAL} and $_->{CANONICAL} eq "YES" } @all_effects unless( $maf_effect );
+
+        # If we still have nothing selected, then just report the worst effect
+        $maf_effect = $all_effects[0] unless( $maf_effect );
+    }
+
+    # Construct the MAF columns from the $maf_effect hash
+    %maf_line = map{( $_, ( $maf_effect->{$_} ? $maf_effect->{$_} : '' ))} @maf_header;
+    $maf_line{Hugo_Symbol} = $maf_effect->{Transcript_ID} unless( $maf_effect->{Hugo_Symbol} );
+    $maf_line{Hugo_Symbol} = 'Unknown' unless( $maf_effect->{Transcript_ID} );
+    $maf_line{Entrez_Gene_Id} = ( defined $maf_effect->{Gene} && defined $entrez_id_map{$maf_effect->{Gene}} ? $entrez_id_map{$maf_effect->{Gene}} : "0" );
+    $maf_line{Center} = $maf_center;
+    $maf_line{NCBI_Build} = $ncbi_build;
+    $maf_line{Chromosome} = $chrom;
+    $maf_line{Start_Position} = $start;
+    $maf_line{End_Position} = $stop;
+    $maf_line{Strand} = '+'; # Per MAF definition, only the positive strand is an accepted value
+    $maf_line{STRAND_VEP} = $maf_effect->{STRAND}; # Renamed to avoid mixup with "Strand" above
+    $maf_line{Variant_Classification} = GetVariantClassification( $maf_effect->{One_Consequence}, $var_type, $inframe );
+    $maf_line{Variant_Type} = $var_type;
+    $maf_line{Reference_Allele} = $ref;
+    # ::NOTE:: If tumor genotype is unavailable, then we'll assume it's ref/var heterozygous
+    $maf_line{Tumor_Seq_Allele1} = $ref;
+    $maf_line{Tumor_Seq_Allele2} = $var;
+    if( defined $tum_info{GT} and $tum_info{GT} ne "." and $tum_info{GT} ne "./." ) {
+        # ::NOTE:: MAF only supports biallelic sites. Tumor_Seq_Allele2 must always be the $var
+        # picked earlier. For Tumor_Seq_Allele1, pick the first non-var allele in GT (usually $ref)
+        my ( $idx1, $idx2 ) = split( /[\/|]/, $tum_info{GT} );
+        # If GT was monoploid, then $idx2 will be undefined, and we should set it equal to $idx1
+        $idx2 = $idx1 unless( defined $idx2 );
+        $maf_line{Tumor_Seq_Allele1} = ( $alleles[$idx1] ne $var ? $alleles[$idx1] : $alleles[$idx2] );
+    }
+    # ::NOTE:: If normal genotype is unavailable, then we'll assume it's ref/ref homozygous
+    $maf_line{Match_Norm_Seq_Allele1} = $ref;
+    $maf_line{Match_Norm_Seq_Allele2} = $ref;
+    if( defined $nrm_info{GT} and $nrm_info{GT} ne "." and $nrm_info{GT} ne "./." ) {
+        # ::NOTE:: MAF only supports biallelic sites. So choose the first two alleles listed in GT
+        my ( $idx1, $idx2 ) = split( /[\/|]/, $nrm_info{GT} );
+        # If GT was monoploid, then $idx2 will be undefined, and we should set it equal to $idx1
+        $idx2 = $idx1 unless( defined $idx2 );
+        $maf_line{Match_Norm_Seq_Allele1} = $alleles[$idx1];
+        $maf_line{Match_Norm_Seq_Allele2} = $alleles[$idx2];
+    }
+    $maf_line{Tumor_Sample_Barcode} = $tumor_id;
+    $maf_line{Matched_Norm_Sample_Barcode} = $normal_id;
+    $maf_line{t_depth} = $tum_info{DP} if( defined $tum_info{DP} and $tum_info{DP} ne "." );
+    ( $maf_line{t_ref_count}, $maf_line{t_alt_count} ) = @tum_depths[0,$var_allele_idx] if( @tum_depths );
+    $maf_line{n_depth} = $nrm_info{DP} if( defined $nrm_info{DP} and $nrm_info{DP} ne "." );
+    ( $maf_line{n_ref_count}, $maf_line{n_alt_count} ) = @nrm_depths[0,$var_allele_idx] if( @nrm_depths );
+
+    # Create a semicolon delimited list summarizing the prioritized effects in @all_effects
+    $maf_line{all_effects} = "";
+    foreach my $effect ( @all_effects ) {
+        my $gene_name = $effect->{Hugo_Symbol};
+        my $effect_type = $effect->{One_Consequence};
+        my $protein_change = ( $effect->{HGVSp} ? $effect->{HGVSp} : '' );
+        my $transcript_id = ( $effect->{Transcript_ID} ? $effect->{Transcript_ID} : '' );
+        my $refseq_ids = ( $effect->{RefSeq} ? $effect->{RefSeq} : '' );
+        $maf_line{all_effects} .= "$gene_name,$effect_type,$protein_change,$transcript_id,$refseq_ids;" if( $effect_type and $transcript_id );
+    }
+
+    # Copy FILTER from input VCF, and tag calls with high allele freq in any gnomAD subpopulation
+    my $subpop_count = 0;
+    foreach my $subpop ( qw( AFR AMR ASJ EAS FIN NFE SAS )) {
+        if( $maf_line{"gnomAD_$subpop\_AF"} ) {
+            my ( $subpop_af ) = split( "/", $maf_line{"gnomAD_$subpop\_AF"} );
+            $subpop_count++ if( $subpop_af > $max_subpop_af );
+        }
+    }
+    # Remove existing common_variant tags from input, so it's redefined by this new criteria
+    $filter = join( ";", grep{ $_ ne "common_variant" } split( /,|;/, $filter ));
+    if( $subpop_count > 0 ) {
+        $filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter;common_variant" );
+    }
+    $maf_line{FILTER} = $filter;
+
+    # Also add the reference allele flanking bps that we generated earlier with samtools
+    my $region = "$chrom:" . ( $vcf_pos - 1 ) . "-" . ( $vcf_pos + length( $vcf_ref ));
+    $maf_line{flanking_bps} = $flanking_bps{$region};
+
+    # Add ID and QUAL from the input VCF into respective MAF columns
+    $maf_line{vcf_id} = $var_id;
+    $maf_line{vcf_qual} = $var_qual;
+
+    # Add original VCF POS column
+    $maf_line{vcf_pos} = $vcf_pos;
+
+    # If there are additional INFO data to add, then add those
+    foreach my $info_col ( @addl_info_cols ) {
+
+        #don't clobber an existing value if present
+        if(defined $info{$info_col} && $info{$info_col} ne ""){
+            $maf_line{$info_col} = $info{$info_col} ;
+        }
+    }
+    # If there are additional FORMAT data to add, then add those
+    foreach my $fmt_col ( @addl_fmt_cols ) {
+        my $fmt_key = $fmt_col;
+        if ( $fmt_key =~ /^t_/ ) { $fmt_key =~ s/^t_//; $maf_line{$fmt_col} = ( defined $tum_info{$fmt_key} ? $tum_info{$fmt_key} : "" ); }
+        if ( $fmt_key =~ /^n_/ ) { $fmt_key =~ s/^n_//; $maf_line{$fmt_col} = ( defined $nrm_info{$fmt_key} ? $nrm_info{$fmt_key} : "" ); }
+    }
+
+    # If this is an SV, pair up gene names from separate lines to backfill the Fusion column later
+    if( $split_svs and $var=~m/^<BND|DEL|DUP|INV>$/ ) {
+        my $sv_key = "$var_id-$tumor_id";
+        if( $sv_pair{$sv_key} ) {
+            $sv_pair{$sv_key} = $sv_pair{$sv_key} . "-" . $maf_line{Hugo_Symbol} . " fusion";
+        }
+        else {
+            $sv_pair{$sv_key} = $maf_line{Hugo_Symbol};
+        }
+    }
+
+    # At this point, we've generated all we can about this variant, so write it to the MAF
+    $maf_fh->print( join( "\t", map{( defined $maf_line{$_} ? $maf_line{$_} : "" )} @maf_header ) . "\n" );
+}
+$maf_fh->close;
+$annotated_vcf_fh->close;
+
+# If the MAF lists SVs, backfill the Fusion column with gene-pair names
+warn "STATUS: For any SVs, backfilling Fusion column with gene-pair names...\n" if( $verbose );
+if( $split_svs ) {
+    my $output_name = substr( $output_maf, rindex( $output_maf, "/" ) + 1 );
+    $output_name =~ s/(\.maf)*$//;
+    my $tmp_output_maf = "$tmp_dir/$output_name.tmp.maf";
+
+    my $in_maf_fh = IO::File->new( $output_maf ) or die "ERROR: Couldn't open: $output_maf!\n";
+    my $out_maf_fh = IO::File->new( $tmp_output_maf, ">" ) or die "ERROR: Couldn't open: $tmp_output_maf!\n";
+    my ( $tid_idx, $fusion_idx, $var_id_idx ) = ( 0, 0, 0 );
+    while( my $line = $in_maf_fh->getline ) {
+        chomp( $line );
+        if( $line =~ m/^#/ ) {
+            $out_maf_fh->print( "$line\n" ); # Copy comments unchanged
+        }
+        elsif( $line =~ m/^Hugo_Symbol/ ) {
+            # Copy the header unchanged, after figuring out necessary column indexes
+            foreach( split( /\t/, $line )) { last if( $_ eq "Tumor_Sample_Barcode" ); ++$tid_idx; }
+            foreach( split( /\t/, $line )) { last if( $_ eq "Fusion" ); ++$fusion_idx; }
+            foreach( split( /\t/, $line )) { last if( $_ eq "vcf_id" ); ++$var_id_idx; }
+            $out_maf_fh->print( "$line\n" ); # Copy header unchanged
+        }
+        else {
+            # Write the gene-pair name into the Fusion column if it was backfilled earlier
+            my @cols = split( /\t/, $line, -1 );
+            my $sv_key = $cols[$var_id_idx] . "-" . $cols[$tid_idx];
+            $cols[$fusion_idx] = $sv_pair{$sv_key} if( $sv_pair{$sv_key} );
+            $out_maf_fh->print( join( "\t", @cols ) . "\n" );
+        }
+    }
+    $out_maf_fh->close;
+    $in_maf_fh->close;
+
+    move( $tmp_output_maf, $output_maf );
+}
+
+warn "STATUS: Finished! Check results in $output_maf\n" if( $verbose );
+
+# Converts Sequence Ontology variant types to MAF variant classifications
+sub GetVariantClassification {
+    my ( $effect, $var_type, $inframe ) = @_;
+    return "Targeted_Region" if( not defined $effect or not $effect ); # In case VEP was skipped
+    return "Splice_Site" if( $effect =~ /^(splice_acceptor_variant|splice_donor_variant|transcript_ablation|exon_loss_variant)$/ );
+    return "Nonsense_Mutation" if( $effect eq 'stop_gained' );
+    return "Frame_Shift_Del" if(( $effect eq 'frameshift_variant' or ( $effect eq 'protein_altering_variant' and !$inframe )) and $var_type eq 'DEL' );
+    return "Frame_Shift_Ins" if(( $effect eq 'frameshift_variant' or ( $effect eq 'protein_altering_variant' and !$inframe )) and $var_type eq 'INS' );
+    return "Nonstop_Mutation" if( $effect eq 'stop_lost' );
+    return "Translation_Start_Site" if( $effect =~ /^(initiator_codon_variant|start_lost)$/ );
+    return "In_Frame_Ins" if( $effect =~ /inframe_insertion$/ or ( $effect eq 'protein_altering_variant' and $inframe and $var_type eq 'INS' ));
+    return "In_Frame_Del" if( $effect =~ /inframe_deletion$/ or ( $effect eq 'protein_altering_variant' and $inframe and $var_type eq 'DEL' ));
+    return "Missense_Mutation" if( $effect =~ /^(missense_variant|coding_sequence_variant|conservative_missense_variant|rare_amino_acid_variant)$/ );
+    return "Intron" if ( $effect =~ /^(transcript_amplification|intron_variant|INTRAGENIC|intragenic_variant)$/ );
+    return "Splice_Region" if( $effect eq 'splice_region_variant' );
+    return "Silent" if( $effect =~ /^(incomplete_terminal_codon_variant|synonymous_variant|stop_retained_variant|NMD_transcript_variant)$/ );
+    return "RNA" if( $effect =~ /^(mature_miRNA_variant|exon_variant|non_coding_exon_variant|non_coding_transcript_exon_variant|non_coding_transcript_variant|nc_transcript_variant)$/ );
+    return "5'UTR" if( $effect =~ /^(5_prime_UTR_variant|5_prime_UTR_premature_start_codon_gain_variant)$/ );
+    return "3'UTR" if( $effect eq '3_prime_UTR_variant' );
+    return "IGR" if( $effect =~ /^(TF_binding_site_variant|regulatory_region_variant|regulatory_region|intergenic_variant|intergenic_region)$/ );
+    return "5'Flank" if( $effect eq 'upstream_gene_variant' );
+    return "3'Flank" if ( $effect eq 'downstream_gene_variant' );
+
+    # Annotate everything else simply as a targeted region
+    # TFBS_ablation, TFBS_amplification,regulatory_region_ablation, regulatory_region_amplification,
+    # feature_elongation, feature_truncation
+    return "Targeted_Region";
+}
+
+# Fix the AD and DP fields, given data from a FORMATted genotype string
+sub FixAlleleDepths {
+    my ( $alleles_ref, $var_allele_idx, $fmt_info_ref ) = @_;
+    my %fmt_info = %{$fmt_info_ref};
+    my @alleles = @{$alleles_ref};
+    my @depths = ();
+
+    # If AD is defined, then parse out all REF/ALT allele depths, or whatever is in it
+    if( defined $fmt_info{AD} and $fmt_info{AD} ne "." ) {
+        @depths = map{( m/^\d+$/ ? $_ : "" )}split( /,/, $fmt_info{AD} );
+    }
+
+    # Handle VarScan VCF lines where AD contains only 1 depth, and REF allele depth is in RD
+    if( scalar( @depths ) == 1 and defined $fmt_info{RD} ) {
+        @depths = map{""} @alleles;
+        $depths[0] = $fmt_info{RD};
+        $depths[$var_allele_idx] = $fmt_info{AD};
+    }
+    # Handle SomaticSniper VCF lines, where allele depths must be extracted from BCOUNT
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{BCOUNT} ) {
+        my %b_idx = ( A=>0, C=>1, G=>2, T=>3 );
+        my @bcount = split( /,/, $fmt_info{BCOUNT} );
+        @depths = map{(( defined $b_idx{$_} and defined $bcount[$b_idx{$_}] ) ? $bcount[$b_idx{$_}] : "" )} @alleles;
+    }
+    # Handle VCF SNV lines by Strelka, where allele depths are in AU:CU:GU:TU
+    elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/AU CU GU TU/ ) == 4 ) {
+        # Strelka allele depths come in tiers 1,2. We'll use tier1 cuz it's stricter, and DP already is
+        map{( $fmt_info{$_.'U'} ) = split( ",", $fmt_info{$_.'U'} )} qw( A C G T );
+
+        # If the only ALT allele is N, then set it to the allele with the highest non-ref readcount
+        if( scalar( @alleles ) == 2 and $alleles[1] eq "N" ) {
+            my %acgt_depths = map{( defined $fmt_info{$_.'U'} ? ( $_, $fmt_info{$_.'U'} ) : ( $_, "" ))} qw( A C G T );
+            my @deepest = sort {$acgt_depths{$b} <=> $acgt_depths{$a}} keys %acgt_depths;
+            ( $alleles[1] ) = ( $deepest[0] ne $alleles[0] ? $deepest[0] : $deepest[1] );
+        }
+        @depths = map{( defined $fmt_info{$_.'U'} ? $fmt_info{$_.'U'} : "" )} @alleles;
+    }
+    # Handle VCF Indel lines by Strelka, where variant allele depth is in TIR and reference allele depth in TAR
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{TIR} and defined $fmt_info{TAR}) {
+        @depths = map{""} @alleles;
+        $depths[0] = ( split /,/, $fmt_info{TAR} )[0];
+        $depths[$var_allele_idx] = ( split /,/, $fmt_info{TIR} )[0];
+    }
+    # Handle VCF lines by CaVEMan, where allele depths are in FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ
+    elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/FAZ FCZ FGZ FTZ RAZ RCZ RGZ RTZ/ ) == 8 ) {
+        # Create tags for forward+reverse strand reads, and use those to determine REF/ALT depths
+        map{ $fmt_info{$_} = $fmt_info{'F'.$_} + $fmt_info{'R'.$_} } qw( AZ CZ GZ TZ );
+        @depths = map{( defined $fmt_info{$_.'Z'} ? $fmt_info{$_.'Z'} : "" )} @alleles;
+    }
+    # Handle VCF lines from the Ion Torrent Suite where ALT depths are in AO and REF depths are in RO
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{AO} and defined $fmt_info{RO} ) {
+        @depths = ( $fmt_info{RO}, map{( m/^\d+$/ ? $_ : "" )}split( /,/, $fmt_info{AO} ));
+    }
+    # Handle VCF lines from Delly where REF/ALT SV junction read counts are in RR/RV respectively
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{RR} and defined $fmt_info{RV} ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        $depths[0] = $fmt_info{RR};
+        $depths[$var_allele_idx] = $fmt_info{RV};
+    }
+    # Handle VCF lines where REF/ALT allele counts must be extracted from DP4
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{DP4} and scalar( split( /,/, $fmt_info{DP4} )) == 4 ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        # DP4 is usually a comma-delimited list for ref-forward, ref-reverse, alt-forward and alt-reverse read counts
+        my @count = split( /,/, $fmt_info{DP4} );
+        $depths[0] = $count[0] + $count[1];
+        $depths[$var_allele_idx] = $count[2] + $count[3];
+    }
+    # Handle VCF lines from cgpPindel, where ALT depth and total depth are in PP:NP:PR:NR
+    elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/PP NP PR NR/ ) == 4 ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        $depths[$var_allele_idx] = $fmt_info{PP} + $fmt_info{NP};
+        $fmt_info{DP} = $fmt_info{PR} + $fmt_info{NR};
+    }
+    # Handle VCF lines with ALT allele fraction in FA, which needs to be multiplied by DP to get AD
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{FA} and defined $fmt_info{DP} and $fmt_info{DP} ne '.' ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        $depths[$var_allele_idx] = sprintf( "%.0f", $fmt_info{FA} * $fmt_info{DP} );
+    }
+    # Handle VCF lines from mpileup/bcftools where DV contains the ALT allele depth
+    elsif( !defined $fmt_info{AD} and defined $fmt_info{DV} and defined $fmt_info{DP} ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        $depths[$var_allele_idx] = $fmt_info{DV};
+    }
+    # Handle VCF lines where AD contains only 1 value, that we can assume is the variant allele
+    elsif( defined $fmt_info{AD} and @depths and scalar( @depths ) == 1 ) {
+        # Reference allele depth and depths for any other ALT alleles must be left undefined
+        @depths = map{""} @alleles;
+        $depths[$var_allele_idx] = $fmt_info{AD};
+    }
+    # For all other lines where #depths is not equal to #alleles, blank out the depths
+    elsif( @depths and scalar( @depths ) ne scalar( @alleles )) {
+        @depths = map{""} @alleles;
+    }
+
+    # Sanity check that REF/ALT allele depths are lower than the total depth
+    if( defined $fmt_info{DP} and $fmt_info{DP} ne '.' and (( $depths[0] and $depths[0] > $fmt_info{DP} ) or
+        ( $depths[$var_allele_idx] and $depths[$var_allele_idx] > $fmt_info{DP} ) or
+        ( $depths[0] and $depths[$var_allele_idx] and $depths[0] + $depths[$var_allele_idx] > $fmt_info{DP} ))) {
+        $fmt_info{DP} = 0;
+        map{$fmt_info{DP} += $_ if($_ and $_ ne '.')} @depths;
+    }
+
+    # If we have REF/ALT allele depths, but no DP, then set DP equal to the sum of all ADs
+    if(( defined $depths[0] and defined $depths[$var_allele_idx] ) and ( !defined $fmt_info{DP} or $fmt_info{DP} eq '.' )) {
+        $fmt_info{DP} = 0;
+        map{$fmt_info{DP} += $_ if($_ and $_ ne '.')} @depths;
+    }
+
+    # Put all our changes back into the hash/array references that were passed over
+    $fmt_info{AD} = join( ",", map{( $_ ne "" ? $_ : "." )} @depths );
+    %{$fmt_info_ref} = %fmt_info;
+    @{$alleles_ref} = @alleles;
+
+    return 1;
+}
+
+__DATA__
+
+=head1 NAME
+
+B<vcf2maf.pl> - Convert a VCF into a MAF by mapping each variant to only one of
+all possible gene isoforms
+
+=head1 SYNOPSIS
+
+ perl vcf2maf.pl --help
+
+ perl vcf2maf.pl --input-vcf INPUT.vcf --output-maf OUTPUT.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID
+
+
+=head1 DESCRIPTION
+
+To convert a VCF into a MAF, each variant must be mapped to only one of all
+possible gene transcripts/isoforms that it might affect. This selection of
+a single effect per variant, is often subjective. This project is an
+attempt to make the selection criteria smarter, reproducible, and more
+configurable.
+
+This script uses Ensembl's VEP, a variant annotator that maps effects of a variant on
+all possible genes and transcripts. For more info, see the README or
+L<https://ensembl.org/info/docs/tools/vep/index.html>.
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--help>
+
+Print a basic help message
+
+=item B<--verbose>
+
+Print more things to STDERR to log progress
+
+=item B<--input-vcf>=I<INPUT_VCF>
+
+Path to input file in VCF format
+
+=item B<--output-maf>=I<OUTPUT_VCF>
+
+Path to output MAF file
+
+=item B<--tmp-dir>=I<TMP_DIR>
+
+Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF]
+
+=item B<--tumor-id>=I<TUMOR_ID>
+
+Tumor_Sample_Barcode to report in the MAF [TUMOR]
+
+=item B<--normal-id>=I<NORMAL_ID>
+
+Matched_Norm_Sample_Barcode to report in the MAF [NORMAL]
+
+=item B<--vcf-tumor-id>=I<TUMOR_ID>
+
+Tumor sample ID used in VCF's genotype columns [--tumor-id]
+
+=item B<--vcf-normal-id>=I<NORMAL_ID>
+
+Matched normal ID used in VCF's genotype columns [--normal-id]
+
+=item B<--online>
+
+Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events)
+
+=item B<--ref-fasta>=I<FASTA>
+
+Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
+
+=item B<--species>=I<SPECIES>
+
+Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens]
+
+=item B<--ncbi-build>=I<ASSEMBLY>
+
+NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37]
+
+=item B<--cache-version>=I<N>
+
+Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version]
+
+=item B<--remap-chain>=I<REMAP_CHAIN>
+
+Chain file to remap variants to a different assembly before running VEP
+
+=item B<--man>
+
+Print the detailed manual with advanced options
+
+
+=back
+
+=head1 ADVANCED OPTIONS
+
+=head2 OUTPUT FILTERING
+
+=over 8
+
+=item B<--any-allele>
+
+When reporting co-located variants, allow mismatched variant alleles too
+
+=item B<--min-hom-vaf>=I<N>
+
+If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7]
+
+=item B<--man>
+
+Print the detailed manual with advanced options
+
+=back
+
+=head2 CUSTOMIZED OUTPUT
+
+=over 8
+
+=item B<--maf-center>=I<CENTER_NAME>
+
+Variant calling center to report in MAF [.]
+
+=item B<--custom-enst>=I<LIST>
+
+Comma-delimited list of custom ENST IDs that override canonical selection []
+
+=item B<--retain-info>=I<LIST>
+
+Comma-delimited names of INFO fields to retain as extra columns in MAF []
+
+=item B<--retain-fmt>=I<LIST>
+
+Comma-delimited names of FORMAT fields to retain as extra columns in MAF []
+
+=item B<--retain-ann>=I<LIST>
+
+Comma-delimited names of VEP annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF []
+
+=item B<--vep-custom>=I<VEP_CUSTOM_STRING>
+
+String to pass into VEP's --custom option [] (see L<CUSTOMIZED VEP ANNOTATION> below)
+
+=item B<--vep-config>=I<VEP_CUSTOM_STRING>
+
+VEP config file to pass into vep's --config option [] (see L<CUSTOMIZED VEP ANNOTATION> below)
+
+=back
+
+=head3 CUSTOMIZED VEP ANNOTATION
+
+=over 2
+
+VEP's customization options are described at:
+
+L<https://useast.ensembl.org/info/docs/tools/vep/script/vep_custom.html>
+
+The custom VEP output is saved in the B<INFO> section of the VCF line, as part of the B<CSQ=> section.
+
+To retain the customized output in the MAF file, in addition to specifing the custom annoation
+and fields with B<--vep-custom> , we need to specify the fields to retain with B<--retain-ann>.
+
+VEP's B<--custom>=I<STRING> is a comma-separated string:
+
+Filename,I<Short_name>,File_type,Annotation_type,Force_report_coordinates,I<VCF_fields>
+
+where I<Short_name> is a prefix for the annotations and I<VCF_fields> is a
+comma-separated list of the annotations to include.
+
+For each annotation we want to retain, we add I<Short_name>B<_>I<VCF_FIELD>
+to the B<--retain-ann> and delimit them with commas.
+
+For example, below we have Short_name of I<MY_Ann> and VCF_fields of I<AD,TOPMED>
+
+=over 8
+
+--vep-custom my_ann.vcf,I<MY_Ann>,vcf,exact,,I<AD,TOPMED>
+
+--retain-ann I<MY_Ann>B<_>I<AD>,I<MY_Ann>B<_>I<TOPMED>
+
+=back
+
+=back
+
+=head2 SUBPROCESSES
+
+=head3 VEP CUSTOMIZATION
+
+=over 8
+
+=item B<--inhibit-vep>
+
+Skip running VEP, but extract VEP annotation in VCF if found
+
+=item B<--vep-path>=I<PATH_TO_VEP_EXEC>
+
+Folder containing the vep script [~/miniconda3/bin]
+
+=item B<--vep-data>=I<PATH_TO_VEP_CACHE>
+
+VEP's base cache/plugin directory [~/.vep]
+
+=item B<--vep-forks>=I<N>
+
+Number of forked processes to use when running VEP [4]
+
+=item B<--vep-overwrite>
+
+Allow VEP to overwrite annotated output (if it exists)
+
+=item B<--buffer-size>=I<N>
+
+Number of variants VEP loads at a time; Reduce this for low memory systems [5000]
+
+=back
+
+
+=head3 SUBPROCESS EXECUTABLES
+
+=over 8
+
+=item B<--samtools-exec>=I<PATH_TO_SAMTOOLS_EXEC>
+
+Path to the samtools executable [Looks on PATH by default]
+
+=item B<--tabix-exec>=I<PATH_TO_TABIX_EXEC>
+
+Path to the tabix executable [Looks on PATH by default]
+
+=item B<--liftover-exec>=I<PATH_TO_LIFTOVER_EXEC>
+
+Path to the liftover executable [Looks on PATH by default]
+
+=back
+
+=head2 RELEVANT LINKS:
+
+=over 8
+
+=item B<vcf2maf> homepage:
+
+L<https://github.com/ckandoth/vcf2maf>
+
+=item VCF format:
+
+L<https://samtools.github.io/hts-specs/>
+
+=item MAF format:
+
+L<https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format>
+
+=item Variant Effect Predictor (VEP):
+
+L<https://ensembl.org/info/docs/tools/vep/index.html>
+
+=item VEP annotated VCF format:
+
+L<https://ensembl.org/info/docs/tools/vep/vep_formats.html#vcfout>
+
+=item VEP customized output:
+
+L<https://useast.ensembl.org/info/docs/tools/vep/script/vep_custom.html>
+
+=back
+
+=head1 AUTHORS
+
+ Cyriac Kandoth (ckandoth@gmail.com)
+ Shweta Chavan (chavan.shweta@gmail.com)
+ Zuojian Tang (zuojian.tang@gmail.com)
+
+=head1 LICENSE
+
+ Apache-2.0 | Apache License, Version 2.0 | https://www.apache.org/licenses/LICENSE-2.0
+
+=cut
diff --git a/modules/vcf2maf/1.3/vcf2maf.smk b/modules/vcf2maf/1.3/vcf2maf.smk
new file mode 100644
index 000000000..1cf2a63bf
--- /dev/null
+++ b/modules/vcf2maf/1.3/vcf2maf.smk
@@ -0,0 +1,206 @@
+#!/usr/bin/env snakemake
+
+
+##### ATTRIBUTION #####
+
+
+# Original Author:  Bruno Grande
+# Module Author:    Helena Winata
+# Contributors:     Ryan Morin
+
+
+##### SETUP #####
+
+import sys, os
+from os.path import join
+
+import oncopipe as op
+
+# Setup module and store module-specific configuration in `CONFIG`
+CFG = op.setup_module(
+    name = "vcf2maf",
+    version = "1.3",
+    subdirectories = ["inputs","decompressed","vcf2maf","crossmap","outputs"]
+)
+
+# Define rules to be run locally when using a compute cluster
+localrules:
+    _vcf2maf_input_vcf,
+    _vcf2maf_gnomad_filter_maf,
+    _vcf2maf_output_maf,
+    _vcf2maf_crossmap,
+    _vcf2maf_all
+
+VCF2MAF_GENOME_VERSION_MAP = {
+    "grch37": "GRCh37",
+    "hg38": "GRCh38",
+    "hs37d5": "GRCh37"
+}
+
+#set variable for prepending to PATH based on config
+VCF2MAF_SCRIPT_PATH = CFG['inputs']['src_dir']
+
+##### RULES #####
+
+# Symlinks the input files into the module results directory (under '00-inputs/')
+rule _vcf2maf_input_vcf:
+    input:
+        vcf_gz = CFG["inputs"]["sample_vcf_gz"]
+    output:
+        vcf_gz = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.vcf.gz",
+        index = CFG["dirs"]["inputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.vcf.gz.tbi"
+    run:
+        op.relative_symlink(input.vcf_gz, output.vcf_gz)
+        op.relative_symlink(input.vcf_gz + ".tbi", output.index)
+
+rule _vcf2maf_annotate_gnomad:
+    input:
+        vcf = str(rules._vcf2maf_input_vcf.output.vcf_gz),
+        normalized_gnomad = reference_files("genomes/{genome_build}/variation/af-only-gnomad.normalized.{genome_build}.vcf.gz")
+    output:
+        vcf = temp(CFG["dirs"]["decompressed"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.annotated.vcf")
+    conda:
+        CFG["conda_envs"]["bcftools"]
+    resources: 
+        **CFG["resources"]["annotate"]
+    threads: 
+        CFG["threads"]["annotate"]
+    shell:
+        op.as_one_line("""
+        bcftools annotate --threads {threads} -a {input.normalized_gnomad} {input.vcf} -c "INFO/gnomADg_AF:=INFO/AF" -o {output.vcf}
+        """)
+
+rule _vcf2maf_run:
+    input:
+        vcf = str(rules._vcf2maf_annotate_gnomad.output.vcf),
+        fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"),
+        vep_cache = CFG["inputs"]["vep_cache"]
+    output:
+        maf = temp(CFG["dirs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.maf"),
+        vep = temp(CFG["dirs"]["decompressed"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.annotated.vep.vcf")
+    log:
+        stdout = CFG["logs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}_vcf2maf.stdout.log",
+        stderr = CFG["logs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}_vcf2maf.stderr.log",
+    params:
+        opts = CFG["options"]["vcf2maf"],
+        build = lambda w: VCF2MAF_GENOME_VERSION_MAP[w.genome_build],
+        custom_enst = op.switch_on_wildcard("genome_build", CFG["switches"]["custom_enst"])
+    conda:
+        CFG["conda_envs"]["vcf2maf"]
+    threads:
+        CFG["threads"]["vcf2maf"]
+    resources:
+        **CFG["resources"]["vcf2maf"]
+    shell:
+        op.as_one_line("""
+        VCF2MAF_SCRIPT_PATH={VCF2MAF_SCRIPT_PATH};
+        PATH=$VCF2MAF_SCRIPT_PATH:$PATH;
+        VCF2MAF_SCRIPT="$VCF2MAF_SCRIPT_PATH/vcf2maf.pl";
+        if [[ -e {output.maf} ]]; then rm -f {output.maf}; fi;
+        if [[ -e {output.vep} ]]; then rm -f {output.vep}; fi;
+        vepPATH=$(dirname $(which vep))/../share/variant-effect-predictor*;
+        if [[ $(which vcf2maf.pl) =~ $VCF2MAF_SCRIPT ]]; then
+            echo "using bundled patched script $VCF2MAF_SCRIPT";
+            echo "Using $VCF2MAF_SCRIPT to run {rule} for {wildcards.tumour_id} on $(hostname) at $(date)" > {log.stderr};
+            vcf2maf.pl
+            --input-vcf {input.vcf}
+            --output-maf {output.maf}
+            --tumor-id {wildcards.tumour_id}
+            --normal-id {wildcards.normal_id}
+            --ref-fasta {input.fasta}
+            --ncbi-build {params.build}
+            --vep-data {input.vep_cache}
+            --vep-path $vepPATH
+            {params.opts}
+            --custom-enst {params.custom_enst}
+            --retain-info gnomADg_AF
+            >> {log.stdout} 2>> {log.stderr};
+        else echo "WARNING: PATH is not set properly, using $(which vcf2maf.pl) will result in error during execution. Please ensure $VCF2MAF_SCRIPT exists." > {log.stderr};fi  &&
+        touch {output.vep}
+        """)
+
+rule _vcf2maf_gnomad_filter_maf:
+    input:
+        maf = str(rules._vcf2maf_run.output.maf)
+    output:
+        maf = CFG["dirs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.gnomad_filtered.maf",
+        dropped_maf = CFG["dirs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.gnomad_filtered.dropped.maf.gz"
+    params:
+        opts = CFG["options"]["gnomAD_cutoff"],
+        temp_file = CFG["dirs"]["vcf2maf"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.gnomad_filtered.dropped.maf"
+    shell:
+        op.as_one_line("""
+        cat {input.maf} | perl -lane 'next if /^(!?#)/; my @cols = split /\t/; @AF_all =split/,/, $cols[114]; $skip=0; for(@AF_all){{$skip++ if $_ > {params.opts}}} if ($skip) {{print STDERR;}} else {{print;}};' > {output.maf} 2>{params.temp_file}
+            &&
+        gzip {params.temp_file}
+            &&
+        touch {output.dropped_maf}
+        """)
+
+def get_chain(wildcards):
+    if "38" in str({wildcards.genome_build}):
+        return reference_files("genomes/{genome_build}/chains/grch38/hg38ToHg19.over.chain")
+    else:
+        return reference_files("genomes/{genome_build}/chains/grch37/hg19ToHg38.over.chain")
+
+rule _vcf2maf_crossmap:
+    input:
+        maf = rules._vcf2maf_gnomad_filter_maf.output.maf,
+        convert_coord = CFG["inputs"]["convert_coord"],
+        chains = get_chain
+    output:
+        dispatched =  CFG["dirs"]["crossmap"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.converted"
+    log:
+        stdout = CFG["logs"]["crossmap"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.crossmap.stdout.log",
+        stderr = CFG["logs"]["crossmap"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.crossmap.stderr.log"
+    conda:
+        CFG["conda_envs"]["crossmap"]
+    threads:
+        CFG["threads"]["vcf2maf"]
+    resources:
+        **CFG["resources"]["crossmap"]
+    params:
+        out_name = CFG["dirs"]["crossmap"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}/{base_name}.converted_",
+        chain = lambda w: "hg38ToHg19" if "38" in str({w.genome_build}) else "hg19ToHg38",
+        file = ".maf"
+    shell:
+        op.as_one_line("""
+        {input.convert_coord}
+        {input.maf}
+        {input.chains}
+        {params.out_name}{params.chain}{params.file}
+        crossmap
+        > {log.stdout} 2> {log.stderr}
+        && touch {output.dispatched}
+        """)
+
+
+rule _vcf2maf_output_maf:
+    input:
+        maf = str(rules._vcf2maf_gnomad_filter_maf.output.maf),
+        maf_converted = str(rules._vcf2maf_crossmap.output.dispatched)
+    output:
+        maf = CFG["dirs"]["outputs"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}_{base_name}.maf"
+    params:
+        chain = lambda w: "hg38ToHg19" if "38" in str({w.genome_build}) else "hg19ToHg38"
+    run:
+        op.relative_symlink(input.maf, output.maf)
+        op.relative_symlink((input.maf_converted+str("_")+str(params.chain)+str(".maf")), (output.maf[:-4]+str(".converted_")+str(params.chain)+str(".maf")))
+
+# Generates the target sentinels for each run, which generate the symlinks
+rule _vcf2maf_all:
+    input:
+        expand(str(rules._vcf2maf_output_maf.output.maf), zip,
+            seq_type = CFG["runs"]["tumour_seq_type"],
+            genome_build = CFG["runs"]["tumour_genome_build"],
+            tumour_id = CFG["runs"]["tumour_sample_id"],
+            normal_id = CFG["runs"]["normal_sample_id"],
+            pair_status = CFG["runs"]["pair_status"],
+            base_name = [CFG["vcf_base_name"]] * len(CFG["runs"]["tumour_sample_id"]))
+
+##### CLEANUP #####
+
+
+# Perform some clean-up tasks, including storing the module-specific
+# configuration on disk and deleting the `CFG` variable
+op.cleanup_module(CFG)
\ No newline at end of file
diff --git a/modules/vcf2maf/CHANGELOG.md b/modules/vcf2maf/CHANGELOG.md
index bfadfe0f8..28c61255c 100644
--- a/modules/vcf2maf/CHANGELOG.md
+++ b/modules/vcf2maf/CHANGELOG.md
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 This release was authored by Kostia Dreval
 
-- This version of modules hamdles vcf2maf feature of specifying non-canoniical ENST IDs to override canonical selection. It can be specified in config as a path to txt file containing list of IDs. If no IDs to be provided, the `switches` should be left blank. Separate lists can be provided for different genome builds. In addition, decompressed vcf files in this version are marked as `temp()` to be deleted after conversion, since they were left in the module folders and taking unnecessary disk space. Finally, resources restriction was enabled in module configuration, because multiple jobs using the same vep file created I/O bottleneck and slowed down some systems.
+- This version of modules hamdles vcf2maf feature of specifying non-canoniical ENST IDs to override canonical selection. It can be specified in config as a path to txt file containing list of IDs. If no IDs to be provided, the `switches` should be left blank. Separate lists can be provided for different genome builds. If file with custom transcripts is specified but does not have any transcripts listed, it should contain at least new line character as vcf2maf checks for this file to be more than 0 b. In addition, decompressed vcf files in this version are marked as `temp()` to be deleted after conversion, since they were left in the module folders and taking unnecessary disk space. Finally, resources restriction was enabled in module configuration, because multiple jobs using the same vep file created I/O bottleneck and slowed down some systems.
 
 
 ## [1.1] - 2020-11-04
diff --git a/oncopipe/oncopipe/__init__.py b/oncopipe/oncopipe/__init__.py
index cc08d043c..f4afd2534 100755
--- a/oncopipe/oncopipe/__init__.py
+++ b/oncopipe/oncopipe/__init__.py
@@ -9,6 +9,7 @@
 import collections.abc
 from datetime import datetime
 from collections import defaultdict, namedtuple
+from .__version__ import __version__
 
 import yaml
 import pandas as pd
@@ -1221,11 +1222,15 @@ def generate_runs(
     # Generate Sample instances for unmatched normal samples from sample IDs
     Sample = namedtuple("Sample", samples.columns.tolist())
     sample_genome_builds = samples["genome_build"].unique()
+    sample_seq_type = samples[["seq_type","genome_build"]].drop_duplicates()
+    pairing_config = { seq_type: pairing_config[seq_type] for seq_type in sample_seq_type["seq_type"].tolist() }
+
     for seq_type, args_dict in pairing_config.items():
         if (
             "run_unpaired_tumours_with" in args_dict
             and args_dict["run_unpaired_tumours_with"] == "unmatched_normal"
             and unmatched_normal_ids is not None
+
         ):
             unmatched_normals = dict()
             for key, normal_id in unmatched_normal_ids.items():
@@ -1239,11 +1244,26 @@ def generate_runs(
                     (samples.sample_id == normal_id) & (samples.seq_type == seq_type)
                 ]
                 num_matches = len(normal_row)
-                assert num_matches == 1, (
+
+                if (
+                    num_matches == 0
+                ):
+                    print(
                     f"There are {num_matches} {seq_type} samples matching "
-                    f"the normal ID {normal_id} (instead of just one)."
-                )
-                unmatched_normals[key] = Sample(*normal_row.squeeze())
+                    f"the normal ID {normal_id}. Make sure the default unmatched normal for {key} specified in "
+                    f"config[‘unmatched_normal_ids’] is not excluded from the samples table"
+                    )
+                    quit()
+                elif num_matches == 1:
+                    unmatched_normals[key] = Sample(*normal_row.squeeze())
+                elif num_matches > 1:
+                    print(
+                    f"There are {num_matches} {seq_type} samples matching "
+                    f"the normal ID {normal_id}. This means there are {num_matches} normal samples for {key} in "
+                    f"the samples table and it is not desired. Please ensure all sample_id, seq_type, "
+                    f"and genome_build combinations are unique."
+                    )
+                    quit()
             args_dict["unmatched_normals"] = unmatched_normals
         elif (
             "run_unpaired_tumours_with" in args_dict
@@ -1781,3 +1801,56 @@ def cleanup_module(module_config):
     # Add back the TSV fields
     for field in tsv_fields.keys():
         module_config[field] = tsv_fields[field]
+
+
+# Kostia functions
+def get_capture_space(module_config, sample_id, genome_build, seq_type, return_ext):
+    """Returns path to the file with capspace to be used for  genome build.
+
+    Parameters
+    ----------
+    module_config : dict
+        The module-specific configuration.
+    sample_id : str
+        The id for a specific sample for which capture space should be returned.
+        Allows for both normal and tumour id.
+    genome_build : str
+        The specific genome build for which to return capture space.
+        Allows to unambiguously handle samples aligned to different genome versions.
+    seq_type : str
+        The soecific seq type for which to return capture space.
+    return_ext : str
+        The extension of the capture space file to be returned (.bed, .vcf, .vcf.gz).
+
+    Returns
+    -------
+    str
+        The path to a file relative to the reference files parental directory.
+    """
+
+    # Convenient variable to access sample table
+    module_samples = module_config["samples"]
+
+    this_sample = module_samples.loc[(module_samples['sample_id'] == sample_id) &
+            (module_samples['genome_build'] == genome_build) &
+            (module_samples['seq_type'] == seq_type)]
+
+    if len(this_sample) != 1:
+        raise AssertionError("Found %s matches when examining the sample table for pair \'%s\' \'%s\' \'%s\'" % (len(sample), sample_id, genome_build, seq_type))
+
+    if "capture_space" in this_sample.columns:
+        panel = this_sample.iloc[0]['capture_space']
+    else:
+        panel = "none"
+
+    # If this panel is "none" (aka not specified) use the default for this reference genome
+    if panel.upper() in (name.upper() for name in ['none', "na", "n/a", ""]):
+        try:
+            if "38" in genome_build:
+                panel = "exome-utr-grch38"
+            else:
+                panel = "exome-utr-grch37"
+        except KeyError as e:
+            raise AttributeError("No default capture space was specified for genome version \'%s\'. You can specify a default by setting \'default=\'true\'\' in a \'%s\'-based capture space in the reference config" % (genome_version, genome_version)) from e
+    # Now that we have found the corresponding capture region for this sample, obtain the requested file
+    return "genomes/" + genome_build + "/capture_space/" + panel + ".padded." + return_ext
diff --git a/oncopipe/oncopipe/__version__.py b/oncopipe/oncopipe/__version__.py
index a5673be8c..8c861b0fd 100644
--- a/oncopipe/oncopipe/__version__.py
+++ b/oncopipe/oncopipe/__version__.py
@@ -6,7 +6,7 @@
 
 __title__ = "oncopipe"
 __description__ = "Functions for running Snakemake modules"
-__version__ = "1.0.11"
+__version__ = "1.0.12"
 __author__ = "Bruno Grande"
 __author_email__ = "bgrande@sfu.ca"
 __license__ = "MIT"
diff --git a/oncopipe/setup.py b/oncopipe/setup.py
index 4e89a69ef..b584b728a 100755
--- a/oncopipe/setup.py
+++ b/oncopipe/setup.py
@@ -22,7 +22,7 @@
     license=about["__license__"],
     packages=["oncopipe"],
     package_dir={"oncopipe": pkg_path},
-    install_requires=["pyyaml", "pandas", "snakemake>=5.4,<5.19"],
+    install_requires=["pyyaml", "pandas", "snakemake>=5.31", "packaging"],
     zip_safe=False,
     python_requires=">=3.6.0",
     classifiers=[
diff --git a/template/{{cookiecutter.module_name}}/1.0/config/default.yaml b/template/{{cookiecutter.module_name}}/1.0/config/default.yaml
index 2841ec10f..3e57a78f5 100644
--- a/template/{{cookiecutter.module_name}}/1.0/config/default.yaml
+++ b/template/{{cookiecutter.module_name}}/1.0/config/default.yaml
@@ -19,9 +19,10 @@ lcr-modules:
         threads:
             step_1: 4
 
-        mem_mb:
-            step_1: 2000
-
+        resources:
+            step_1: 
+                mem_mb: 2000
+            
         pairing_config:
         {%- for seq_type, mode in cookiecutter.items() if seq_type.startswith("seq_type.") %}
             {%- if mode != "omit" %}
diff --git a/template/{{cookiecutter.module_name}}/1.0/{{cookiecutter.module_name}}.smk b/template/{{cookiecutter.module_name}}/1.0/{{cookiecutter.module_name}}.smk
index 44fa5eb59..b9eef5f45 100644
--- a/template/{{cookiecutter.module_name}}/1.0/{{cookiecutter.module_name}}.smk
+++ b/template/{{cookiecutter.module_name}}/1.0/{{cookiecutter.module_name}}.smk
@@ -11,10 +11,27 @@
 
 ##### SETUP #####
 
-
 # Import package with useful functions for developing analysis modules
 import oncopipe as op
 
+# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe
+min_oncopipe_version="1.0.11"
+import pkg_resources
+try:
+    from packaging import version
+except ModuleNotFoundError:
+    sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version")
+
+# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe
+
+current_version = pkg_resources.get_distribution("oncopipe").version
+if version.parse(current_version) < version.parse(min_oncopipe_version):
+    print('\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + '\x1b[0m')
+    print('\x1b[0;31;40m' + f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m')
+    sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)")
+
+# End of dependency checking section
+
 # Setup module and store module-specific configuration in `CFG`
 # `CFG` is a shortcut to `config["lcr-modules"]["{{cookiecutter.module_name}}"]`
 CFG = op.setup_module(
@@ -38,13 +55,16 @@ localrules:
 
 # Symlinks the input files into the module results directory (under '00-inputs/')
 # TODO: If applicable, add an input rule for each input file used by the module
+# TODO: If applicable, create second symlink to .crai file in the input function, to accomplish cram support
 rule _{{cookiecutter.module_name}}_input_{{cookiecutter.input_file_type}}:
     input:
         {{cookiecutter.input_file_type}} = CFG["inputs"]["sample_{{cookiecutter.input_file_type}}"]
     output:
         {{cookiecutter.input_file_type}} = CFG["dirs"]["inputs"] + "{{cookiecutter.input_file_type}}/{seq_type}--{genome_build}/{sample_id}.{{cookiecutter.input_file_type}}"
+    group: 
+        "input_and_step_1"
     run:
-        op.relative_symlink(input.{{cookiecutter.input_file_type}}, output.{{cookiecutter.input_file_type}})
+        op.absolute_symlink(input.{{cookiecutter.input_file_type}}, output.{{cookiecutter.input_file_type}})
 
 {% if cookiecutter.module_run_per == "tumour" %}
 # Example variant calling rule (multi-threaded; must be run on compute server/cluster)
@@ -66,7 +86,9 @@ rule _{{cookiecutter.module_name}}_step_1:
     threads:
         CFG["threads"]["step_1"]
     resources:
-        mem_mb = CFG["mem_mb"]["step_1"]
+        **CFG["resources"]["step_1"]
+    group: 
+        "input_and_step_1"
     shell:
         op.as_one_line("""
         <TODO> {params.opts} --tumour {input.tumour_{{cookiecutter.input_file_type}}} --normal {input.normal_{{cookiecutter.input_file_type}}}
@@ -98,7 +120,7 @@ rule _{{cookiecutter.module_name}}_output_{{cookiecutter.output_file_type}}:
     output:
         {{cookiecutter.output_file_type}} = CFG["dirs"]["outputs"] + "{{cookiecutter.output_file_type}}/{seq_type}--{genome_build}/{tumour_id}--{normal_id}--{pair_status}.output.filt.{{cookiecutter.output_file_type}}"
     run:
-        op.relative_symlink(input.{{cookiecutter.output_file_type}}, output.{{cookiecutter.output_file_type}})
+        op.relative_symlink(input.{{cookiecutter.output_file_type}}, output.{{cookiecutter.output_file_type}}, in_module= True)
 
 
 # Generates the target sentinels for each run, which generate the symlinks
@@ -135,7 +157,7 @@ rule _{{cookiecutter.module_name}}_step_1:
     threads:
         CFG["threads"]["step_1"]
     resources:
-        mem_mb = CFG["mem_mb"]["step_1"]
+        **CFG["resources"]["step_1"]    # All resources necessary can be included and referenced from the config files.
     shell:
         op.as_one_line("""
         <TODO> {params.opts} --input {input.{{cookiecutter.input_file_type}}} --ref-fasta {input.fasta}
@@ -166,7 +188,7 @@ rule _{{cookiecutter.module_name}}_output_{{cookiecutter.output_file_type}}:
     output:
         {{cookiecutter.output_file_type}} = CFG["dirs"]["outputs"] + "{{cookiecutter.output_file_type}}/{seq_type}--{genome_build}/{sample_id}.output.filt.{{cookiecutter.output_file_type}}"
     run:
-        op.relative_symlink(input.{{cookiecutter.output_file_type}}, output.{{cookiecutter.output_file_type}})
+        op.relative_symlink(input.{{cookiecutter.output_file_type}}, output.{{cookiecutter.output_file_type}}, in_module= True)
 
 
 # Generates the target sentinels for each run, which generate the symlinks
diff --git a/workflows/reference_files/2.4/config/default.yaml b/workflows/reference_files/2.4/config/default.yaml
index 589bd908d..1ad75c508 100644
--- a/workflows/reference_files/2.4/config/default.yaml
+++ b/workflows/reference_files/2.4/config/default.yaml
@@ -27,6 +27,62 @@ genome_builds:
         version: "grch37"
         provider: "ensembl"
         genome_fasta_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/hs37d5.fa"
+    grch37_masked:
+        # hard-masked repeats
+        version: "grch37"
+        provider: "ensembl"
+        genome_fasta_url: "http://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna_rm.primary_assembly.fa.gz"
+    grch38_masked:
+        # hard-masked repeats # release 102
+        version: "grch38"
+        provider: "ensembl"
+        genome_fasta_url: "http://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_rm.primary_assembly.fa.gz"
+    hg19_masked:
+        # hard-masked repeats
+        version: "grch37"
+        provider: "ucsc"
+        genome_fasta_url: "https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.masked.gz"
+    hg38_masked:
+        # hard-masked repeats
+        version: "grch38"
+        provider: "ucsc"
+        genome_fasta_url: "https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.fa.masked.gz"
+    hg19-reddy:
+        # Version of hg19 with chrM at the start, and chrM with a length of 16569bp. Used for the Reddy dataset
+        version: "grch37"
+        provider: "ucsc"
+        genome_fasta_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/hg19-reddy.fa"
+    hg38-nci:
+        # NCI's version of GRCh38, with a costco-sized flat pack of decoys
+        version: "grch38"
+        provider: "ucsc"
+        genome_fasta_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/grch38-nci.fa"
+    hg38-panea:
+        # Version of hg38 used by Panea et al.
+        version: "grch38"
+        provider: "ucsc"
+        genome_fasta_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/hg38-panea.fa"
+
+capture_space:
+    exome-utr-grch38:
+        genome: "grch38"
+        provider: "ucsc"
+        default: "true"
+        capture_bed_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/capture_bed/grch38_all_genes.canonical.sort.bed4"
+    exome-utr-grch37:
+        genome: "grch37"
+        provider: "ensembl"
+        default: "true"
+        capture_bed_url: "https://www.bcgsc.ca/downloads/lcr-modules/genome_fastas/capture_bed/grch37_all_genes.canonical.sort.bed4"
+    # to add custom capture space, follow this example and fill in dictionary keys
+    #name_of_capture_panel:
+        #genome: "grch37" or "hg38" as example
+        #provider: "ensembl" or "ucsc"
+        #capture_bed_url: here provide a link enclosed in "" to download bed file for capture panel if it is available in internet
+        #capture_bed_file: this key is optional and in "" you can specify path to a local bed file with capture panel
+
+capture_params:
+    padding_size: "200"
 
 wildcard_values:
     gencode_release: ["33"]
@@ -37,6 +93,9 @@ wildcard_values:
     rm_version: ["hg19", "hg38"]
 
 tools:
+    bedtools:
+        conda_env: "envs/bedtools-2.29.2.yaml"
+        version: "2.29.2"
     coreutils: 
         conda_env: "envs/coreutils-8.31.yaml"
         version: "8.31"
@@ -46,6 +105,9 @@ tools:
     samtools: 
         conda_env: "envs/samtools-1.9.yaml"
         version: "1.9"
+    tabix:
+        conda_env: "envs/tabix-0.2.6.yaml"
+        version: "0.2.6"
     bwa: 
         conda_env: "envs/bwa-0.7.17.yaml"
         version: "0.7.17"
@@ -76,6 +138,12 @@ tools:
     gsutil: 
         conda_env: "envs/gsutil-4.53.yaml"
         version: "4.53"
+    sigprofiler:
+        conda_env: "envs/sigprofiler-1.1.yaml"
+        version: "1.1"
+    bcftools:
+        conda_env: "envs/bcftools-1.10.2.yaml"
+        version: "1.10.2"
 
 cvbio_config:
     gtf: 
diff --git a/workflows/reference_files/2.4/envs/bcftools-1.10.2.yaml b/workflows/reference_files/2.4/envs/bcftools-1.10.2.yaml
new file mode 120000
index 000000000..72959e7bb
--- /dev/null
+++ b/workflows/reference_files/2.4/envs/bcftools-1.10.2.yaml
@@ -0,0 +1 @@
+../../../../envs/bcftools/bcftools-1.10.2.yaml
\ No newline at end of file
diff --git a/workflows/reference_files/2.4/envs/bedtools-2.29.2.yaml b/workflows/reference_files/2.4/envs/bedtools-2.29.2.yaml
new file mode 120000
index 000000000..c185b12ac
--- /dev/null
+++ b/workflows/reference_files/2.4/envs/bedtools-2.29.2.yaml
@@ -0,0 +1 @@
+../../../../envs/bedtools/bedtools-2.29.2.yaml
\ No newline at end of file
diff --git a/workflows/reference_files/2.4/envs/sigprofiler-1.1.yaml b/workflows/reference_files/2.4/envs/sigprofiler-1.1.yaml
new file mode 120000
index 000000000..3613e08c5
--- /dev/null
+++ b/workflows/reference_files/2.4/envs/sigprofiler-1.1.yaml
@@ -0,0 +1 @@
+../../../../envs/sigprofiler/sigprofiler-1.1.yaml
\ No newline at end of file
diff --git a/workflows/reference_files/2.4/envs/tabix-0.2.6.yaml b/workflows/reference_files/2.4/envs/tabix-0.2.6.yaml
new file mode 120000
index 000000000..bcf351f21
--- /dev/null
+++ b/workflows/reference_files/2.4/envs/tabix-0.2.6.yaml
@@ -0,0 +1 @@
+../../../../envs/tabix/tabix-0.2.6.yaml
\ No newline at end of file
diff --git a/workflows/reference_files/2.4/prepare_reference_files.smk b/workflows/reference_files/2.4/prepare_reference_files.smk
index 9c0955194..7f8005abe 100644
--- a/workflows/reference_files/2.4/prepare_reference_files.smk
+++ b/workflows/reference_files/2.4/prepare_reference_files.smk
@@ -38,6 +38,8 @@ rule all:
             [
                 rules.get_genome_fasta_download.output.fasta,
                 rules.index_genome_fasta.output.fai,
+                rules.get_masked_genome_fasta_download.output.fasta,
+                rules.index_masked_genome_fasta.output.fai,
                 rules.get_main_chromosomes_download.output.txt,
                 rules.get_main_chromosomes_download.output.bed,
                 rules.get_main_chromosomes_download.output.chrx,
diff --git a/workflows/reference_files/2.4/reference_files.smk b/workflows/reference_files/2.4/reference_files.smk
index fe4a1d441..047d121fb 100644
--- a/workflows/reference_files/2.4/reference_files.smk
+++ b/workflows/reference_files/2.4/reference_files.smk
@@ -12,6 +12,8 @@ rule get_genome_fasta_download:
         fasta = rules.download_genome_fasta.output.fasta
     output: 
         fasta = "genomes/{genome_build}/genome_fasta/genome.fa"
+    wildcard_constraints:
+        genome_build = ".+(?<!masked)"
     conda: CONDA_ENVS["coreutils"]
     shell:
         "ln -srf {input.fasta} {output.fasta}"
@@ -24,6 +26,8 @@ rule index_genome_fasta:
         fai = "genomes/{genome_build}/genome_fasta/genome.fa.fai"
     log: 
         "genomes/{genome_build}/genome_fasta/genome.fa.fai.log"
+    wildcard_constraints:
+        genome_build = ".+(?<!masked)"
     conda: CONDA_ENVS["samtools"]
     shell:
         "samtools faidx {input.fasta} > {log} 2>&1"
@@ -97,11 +101,36 @@ rule get_sdf_refs:
     output: 
         sdf = directory("genomes/{genome_build}/sdf")
     wildcard_constraints: 
-        genome_build = "hg38|hg19|grch37|hs37d5"
+        genome_build = "|".join(SDF_GENOME_BUILDS)
     shell: 
         "ln -srfT {input.sdf} {output.sdf}"
 
 
+rule get_masked_genome_fasta_download:
+    input: 
+        fasta = rules.download_masked_genome_fasta.output.fasta
+    output: 
+        fasta = "genomes/{genome_build}/genome_fasta/genome.fa"
+    wildcard_constraints:
+        genome_build = ".+_masked"
+    conda: CONDA_ENVS["coreutils"]
+    shell:
+        "ln -srf {input.fasta} {output.fasta}"
+
+
+rule index_masked_genome_fasta:
+    input: 
+        fasta = rules.get_masked_genome_fasta_download.output.fasta
+    output: 
+        fai = "genomes/{genome_build}/genome_fasta/genome.fa.fai"
+    log: 
+        "genomes/{genome_build}/genome_fasta/genome.fa.fai.log"
+    wildcard_constraints:
+        genome_build = ".+_masked"
+    conda: CONDA_ENVS["samtools"]
+    shell:
+        "samtools faidx {input.fasta} > {log} 2>&1"
+
 
 ##### METADATA #####
 
@@ -203,15 +232,18 @@ rule calc_gc_content:
 
 rule get_dbsnp_download: 
     input:
-        vcf = get_download_file(rules.download_dbsnp_vcf.output.vcf)
+        vcf = get_download_file(rules.download_dbsnp_vcf.output.vcf),
+        fai = str(rules.index_genome_fasta.output.fai),
+        bed = str(rules.get_main_chromosomes_withY_download.output.bed)
     output:
-        vcf = "genomes/{genome_build}/variation/dbsnp.common_all-{dbsnp_build}.vcf.gz"
-    conda: CONDA_ENVS["samtools"]
+        vcf = "genomes/{genome_build}/variation/dbsnp.common_all-{dbsnp_build}.vcf.gz",
+        tmpfile = temp("genomes/{genome_build}/variation/dbsnp.common_all-{dbsnp_build}.vcf.tmp")
+    conda: CONDA_ENVS["bcftools"]
     shell:
         op.as_one_line("""
-        bgzip -c {input.vcf} > {output.vcf}
-            &&
-        tabix {output.vcf}
+        zgrep -v '##contig' {input.vcf} > {output.tmpfile} &&
+        bcftools reheader --fai {input.fai} {output.tmpfile} | bcftools view -T {input.bed} -O z -o {output.vcf} &&
+        bcftools index -t {output.vcf}
         """)
 
 ##### PICARD METRICS
@@ -345,39 +377,224 @@ rule create_salmon_index:
 
 rule get_af_only_gnomad_vcf:
     input:
-        vcf = get_download_file(rules.download_af_only_gnomad_vcf.output.vcf)
+        vcf = get_download_file(rules.download_af_only_gnomad_vcf.output.vcf),
+        fai = str(rules.index_genome_fasta.output.fai),
+        bed = str(rules.get_main_chromosomes_withY_download.output.bed)
     output:
-        vcf = "genomes/{genome_build}/variation/af-only-gnomad.{genome_build}.vcf.gz"
-    conda: CONDA_ENVS["samtools"]
+        vcf = "genomes/{genome_build}/variation/af-only-gnomad.{genome_build}.vcf.gz",
+        tmpfile = temp("genomes/{genome_build}/variation/af-only-gnomad.{genome_build}.vcf.tmp")
+    conda: CONDA_ENVS["bcftools"]
     shell:
-        op.as_one_line(""" 
-        bgzip -c {input.vcf} > {output.vcf}
+        op.as_one_line("""
+        zgrep -v '##contig' {input.vcf} > {output.tmpfile} &&
+        bcftools reheader --fai {input.fai} {output.tmpfile} | bcftools view -T {input.bed} -O z -o {output.vcf} &&
+        bcftools index -t {output.vcf}
+        """)
+
+rule normalize_af_only_gnomad_vcf:
+    input:
+        fasta = rules.get_genome_fasta_download.output.fasta,
+        vcf = str(rules.get_af_only_gnomad_vcf.output.vcf)
+    output:
+        vcf = "genomes/{genome_build}/variation/af-only-gnomad.normalized.{genome_build}.vcf.gz",
+        vcf_index = "genomes/{genome_build}/variation/af-only-gnomad.normalized.{genome_build}.vcf.gz.tbi"
+    conda: CONDA_ENVS["bcftools"]
+    shell:
+        op.as_one_line("""
+        bcftools view {input.vcf} | grep -v "_alt" | bcftools norm -m -any -f {input.fasta} | bgzip -c > {output.vcf}
+            &&
+        bcftools index -t {output.vcf}
             &&
-        tabix {output.vcf}
+        touch {output.vcf_index}
         """)
 
 rule get_mutect2_pon:
     input:
-        vcf = get_download_file(rules.download_mutect2_pon.output.vcf)
+        vcf = get_download_file(rules.download_mutect2_pon.output.vcf),
+        fai = str(rules.index_genome_fasta.output.fai),
+        bed = str(rules.get_main_chromosomes_withY_download.output.bed)
     output:
-        vcf = "genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.gz"
-    conda: CONDA_ENVS["samtools"]
+        vcf = "genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.gz",
+        tmpfile = temp("genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.tmp")
+    conda: CONDA_ENVS["bcftools"]
+    log:
+        "genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.log"
     shell:
         op.as_one_line(""" 
-        bgzip -c {input.vcf} > {output.vcf}
-            &&
-        tabix {output.vcf}
+        zgrep -v '##contig' {input.vcf} > {output.tmpfile} &&
+        bcftools reheader --fai {input.fai} {output.tmpfile} | bcftools view -T {input.bed} -O z -o {output.vcf} 2> {log} &&
+        bcftools index -t {output.vcf}
         """)
 
 rule get_mutect2_small_exac:
     input:
-        vcf = get_download_file(rules.download_mutect2_small_exac.output.vcf)
+        vcf = get_download_file(rules.download_mutect2_small_exac.output.vcf),
+        fai = str(rules.index_genome_fasta.output.fai),
+        bed = str(rules.get_main_chromosomes_withY_download.output.bed)
     output:
-        vcf = "genomes/{genome_build}/gatk/mutect2_small_exac.{genome_build}.vcf.gz"
-    conda: CONDA_ENVS["samtools"]
+        vcf = "genomes/{genome_build}/gatk/mutect2_small_exac.{genome_build}.vcf.gz",
+        tmpfile = temp("genomes/{genome_build}/gatk/mutect2_small_exac.{genome_build}.vcf.tmp")
+    log:
+        "genomes/{genome_build}/gatk/mutect2_pon.{genome_build}.vcf.log"
+    conda: CONDA_ENVS["bcftools"]
     shell:
         op.as_one_line(""" 
-        bgzip -c {input.vcf} > {output.vcf}
+        zgrep -v '##contig' {input.vcf} > {output.tmpfile} &&
+        bcftools reheader --fai {input.fai} {output.tmpfile} | bcftools view -T {input.bed} -O z -o {output.vcf} 2> {log} &&
+        bcftools index -t {output.vcf}
+        """)
+
+
+### FOR HANDLING CAPTURE SPACE/TARGETED SEQUENCING DATA ###
+# Added by Chris
+# What this *should* do (if I have written these rules correctly) is obtain a capture space BED file
+# check the contig names against the reference (and replace them as necessary), pad, sort,merge, and generate a bgzip/tabix
+# indexed version of the BED as well as an interval list.
+
+def _check_capspace_provider(w):
+    # Checks to determine if both the capture space BED and associated reference genome
+    # are chr prefixed or not
+
+    # If this is specified as the "default", make sure we obtain the relevent capture space
+    default_key = "default-" + w.genome_build
+    if default_key not in config["capture_space"] and w.capture_space == default_key:
+        # aka if the user hasn't explicitly specified a default using a default name
+        capture_space = _get_default_capspace(w)
+    else:
+        capture_space = w.capture_space
+
+    genome_provider = config["genome_builds"][w.genome_build]["provider"]
+    bed_provider = config["capture_space"][capture_space]["provider"]
+    
+    # If the providers match (i.e. they share the same prefix), just use the downloaded version
+    if genome_provider == bed_provider:
+        return {'bed': expand(rules.download_capspace_bed.output.capture_bed, capture_space=capture_space, genome_build=w.genome_build)}
+    else:
+        # Prompt the prefix to be converted
+        chr_status = "chr" if genome_provider == "ucsc" else "no_chr"
+        return {'bed': expand(rules.add_remove_chr_prefix_bed.output.converted_bed, capture_space = capture_space, genome_build = w.genome_build, chr_status= chr_status)}
+
+
+rule get_capspace_bed_download:
+    input:
+        unpack(_check_capspace_provider)
+    output:
+        capture_bed = "genomes/{genome_build}/capture_space/{capture_space}.bed"
+    conda: CONDA_ENVS["coreutils"]
+    shell:
+        "ln -srf {input.bed} {output.capture_bed}"
+
+
+rule sort_and_pad_capspace:
+    input:
+       capture_bed = rules.get_capspace_bed_download.output.capture_bed,
+       fai = rules.index_genome_fasta.output.fai
+    output:
+        intermediate_bed = temp("genomes/{genome_build}/capture_space/{capture_space}.intermediate.bed"),
+        padded_bed = "genomes/{genome_build}/capture_space/{capture_space}.padded.bed"
+    params:
+        padding_size = config['capture_params']['padding_size']  # Default to 200. I would be warry of changing
+    log:
+        "genomes/{genome_build}/capture_space/{capture_space}.padded.bed.log"
+    conda: CONDA_ENVS["bedtools"]
+    shell:
+        op.as_one_line("""
+        cat {input.fai} | cut -f 1-2 | perl -ane 'print "$F[0]\\t0\\t$F[1]\\n"' | bedtools intersect -wa -a {input.capture_bed} -b stdin > {output.intermediate_bed}
             &&
-        tabix {output.vcf}
+        bedtools slop -b {params.padding_size} -i {output.intermediate_bed} -g {input.fai} | bedtools sort | bedtools merge > {output.padded_bed} 2> {log}
         """)
+
+rule check_capspace_contigs:
+    input:
+        bed = rules.get_capspace_bed_download.output.capture_bed,
+        fai = rules.index_genome_fasta.output.fai
+    output:
+        contig_log = "genomes/{genome_build}/capture_space/{capture_space}.check_contigs.log"
+    run:
+        # Parse BED contigs
+        bed_contigs = set()
+        with open(input.bed) as f:
+            for line in f:
+                line = line.rstrip()
+                contig = line.split("\t")[0]
+                if contig not in bed_contigs:
+                    bed_contigs.add(contig)
+
+        # Parse fai contigs
+        fai_contigs = set()
+        with open(input.fai) as f:
+            for line in f:
+                line = line.rstrip()
+                contig = line.split('\t')[0]
+                if contig not in fai_contigs:
+                    fai_contigs.add(contig)
+
+        # Check the BED file for contigs that are not in the reference genome
+        missing_contigs = list(x for x in bed_contigs if x not in fai_contigs)
+        with open(output.contig_log, "w") as o:
+            if len(missing_contigs) == 0:
+                o.write("No contigs missing from reference")
+            else:
+                o.write("The following contigs were missing from the reference\n")
+                o.write("\n".join(missing_contigs))
+            o.write("\n")
+
+
+rule compress_index_capspace_bed:
+    input:
+        capture_bed = rules.sort_and_pad_capspace.output.padded_bed
+    output:
+        bgzip_bed = "genomes/{genome_build}/capture_space/{capture_space}.padded.bed.gz",
+        tabix = "genomes/{genome_build}/capture_space/{capture_space}.padded.bed.gz.tbi"
+    log:
+        "genomes/{genome_build}/capture_space/{capture_space}.padded.bed.gz.log"
+    conda: CONDA_ENVS["tabix"]
+    shell:
+        op.as_one_line("""
+        bgzip -c {input.capture_bed} > {output.bgzip_bed}
+            &&
+        tabix -p bed {output.bgzip_bed}
+        """)
+
+
+rule create_interval_list:
+    input:
+        bed = rules.sort_and_pad_capspace.output.padded_bed,
+        sd = rules.create_gatk_dict.output.dict
+    output:
+        interval_list = "genomes/{genome_build}/capture_space/{capture_space}.padded.interval_list"
+    log:
+        "genomes/{genome_build}/capture_space/{capture_space}.padded.interval_list.log"
+    conda: CONDA_ENVS["gatk"]
+    shell:
+        "gatk BedToIntervalList --INPUT {input.bed} -SD {input.sd} -O {output.interval_list} > {log} 2>&1"
+
+##### SigProfiler #####
+
+rule download_sigprofiler_genome:
+    output:
+        complete = "downloads/sigprofiler_prereqs/{sigprofiler_build}.installed"
+    conda: CONDA_ENVS["sigprofiler"]
+    shell:
+        op.as_one_line("""
+        python -c 'from SigProfilerMatrixGenerator import install as genInstall;
+        genInstall.install("{wildcards.sigprofiler_build}", rsync = False, bash = True)'
+            &&
+        touch {output.complete}
+        """)
+
+def get_sigprofiler_genome(wildcards):
+    sigprofiler_build = ''
+    if wildcards.genome_build in ['grch37','hg19','hs37d5']:
+        sigprofiler_build = "GRCh37"
+    elif wildcards.genome_build in ['grch38','grch38-legacy','hg38','hg38-panea']:
+        sigprofiler_build = "GRCh38"
+    return("downloads/sigprofiler_prereqs/" + sigprofiler_build + ".installed")
+
+rule install_sigprofiler_genome:
+    input:
+        get_sigprofiler_genome
+    output:
+        complete = "genomes/{genome_build}/sigprofiler_genomes/{genome_build}.installed"
+    run:
+        op.relative_symlink(input, output.complete)
diff --git a/workflows/reference_files/2.4/reference_files_header.smk b/workflows/reference_files/2.4/reference_files_header.smk
index 11d6ffa19..eb3d935bd 100644
--- a/workflows/reference_files/2.4/reference_files_header.smk
+++ b/workflows/reference_files/2.4/reference_files_header.smk
@@ -44,6 +44,23 @@ VERSION_UPPER = {
     "grch38": "GRCh38",
 }
 
+GENOME_VERSION_GROUPS = {}
+GENOME_VERSION_MAP = {}
+for genome_build in VERSION_UPPER.keys():
+    GENOME_VERSION_GROUPS[genome_build] = []
+
+# For Starfish SDF files
+SDF_VERSION_MAP = {}
+SDF_GENOME_BUILDS = []
+SDF_IGNORE = {"grch38", "grch38-legacy", "grch38_masked"}  # Ignore non-chr prefixed versions of hg38 since we don't use them
+sdf_genome_mappings = {
+"GRCh37": {"ensembl": "1000g_v37_phase2.sdf", "ucsc": "hg19.sdf"},
+"GRCh38": {"ucsc": "GRCh38.sdf"}
+}
+
+
+DEFAULT_CAPSPACE = {}
+
 # Check genome build versions, providers, and genome_fasta
 possible_versions = list(VERSION_UPPER.keys())
 possible_providers = ["ensembl", "ucsc", "gencode", "ncbi"]
@@ -51,15 +68,49 @@ for build_name, build_info in config["genome_builds"].items():
     assert "version" in build_info and build_info["version"] in possible_versions, (
         f"`version` not set for `{build_name}` or `version` not among {possible_versions}."
     )
-    assert "provider" in build_info and build_info["provider"] in possible_providers, (
+    GENOME_VERSION_GROUPS[build_info["version"]].append(build_name)
+    upper_genome_name = VERSION_UPPER[build_info["version"]]
+    GENOME_VERSION_MAP[build_name] = upper_genome_name
+    genome_provider = build_info["provider"]
+    assert "provider" in build_info and genome_provider in possible_providers, (
         f"`provider` not set for `{build_name}` or `provider` not among {possible_providers}."
     )
-    assert "genome_fasta_url" in build_info, f"`genome_fasta_url` not set for `{build_name}`."
-    url_code = urllib.request.urlopen(build_info["genome_fasta_url"]).getcode()
+    if "genome_fasta_url" in build_info:
+        url_code = urllib.request.urlopen(build_info["genome_fasta_url"]).getcode()
+        assert url_code == 200, (
+            f"Pinging `genome_fasta_url` for {build_name} returned HTTP code {url_code} "
+            f"(rather than 200): \n{build_info['genome_fasta_url']}"
+        )
+    # Find the appropriate SDF file for this genome build
+    if build_name not in SDF_IGNORE:
+        SDF_GENOME_BUILDS.append(build_name)
+        try:
+            SDF_VERSION_MAP[build_name] = sdf_genome_mappings[upper_genome_name][genome_provider]
+        except KeyError as e:
+            raise AttributeError(f"Unable to locate a Starfish SDF file for genome build \'{upper_genome_name}\' and provider \'{genome_provider}\'") from e
+
+# Check parent genome, provider for the capture space
+for build_name, build_info in config["capture_space"].items():
+    assert "provider" in build_info and build_info["provider"] in possible_providers, (
+        f"`provider` not set for `{build_name}` or `provider` not among {possible_providers}."
+        )
+    assert "genome" in build_info and build_info["genome"] in possible_versions,(
+        f"`genome` not set for `{build_name}` or `genome` not among {possible_versions}." )
+    assert "capture_bed_url" in build_info
+    url_code = urllib.request.urlopen(build_info["capture_bed_url"]).getcode()
     assert url_code == 200, (
-        f"Pinging `genome_fasta_url` for {build_name} returned HTTP code {url_code} "
-        f"(rather than 200): \n{build_info['genome_fasta_url']}"
-    )    
+         f"Pinging `capture_bed_url` for {build_name} returned HTTP code {url_code} "
+         f"(rather than 200): \n{build_info['capture_bed_url']}"
+        )
+    if "default" in build_info:
+        assert build_info["default"].lower() in ["true", "false"], (
+            f"true/false required for for \'default\' field"
+            )
+        build_version = build_info["genome"]
+        if build_version in DEFAULT_CAPSPACE:
+            # i.e. a default has already been specified for this genome version!
+            raise AttributeError("For reference genome version \'%s\', both \'%s\' and \'%s\' were specified as default capture spaces in the reference config" % (build_version, DEFAULT_CAPSPACE[build_version], build_name))
+        DEFAULT_CAPSPACE[build_info["genome"]] = build_name
 
 
 ##### TOOLS #####
@@ -122,15 +173,33 @@ for chrom_map_file in CHROM_MAPPINGS_FILES:
 
 
 rule download_genome_fasta:
-    output: 
+    output:
         fasta = "downloads/genome_fasta/{genome_build}.fa"
-    log: 
+    log:
         "downloads/genome_fasta/{genome_build}.fa.log"
-    params: 
-        url = lambda w: config["genome_builds"][w.genome_build]["genome_fasta_url"]
+    wildcard_constraints:
+        genome_build = ".+(?<!masked)"
+    params:
+        path = lambda w: config["genome_builds"][w.genome_build]["genome_fasta_file"] if "genome_fasta_file" in config["genome_builds"][w.genome_build] else config["genome_builds"][w.genome_build]["genome_fasta_url"],
     shell:
-        "curl -L {params.url} > {output.fasta} 2> {log}"
+        op.as_one_line("""
+        if [ -e {params.path} ]; then
+            cat {params.path} > {output.fasta} 2> {log};
+        else
+            curl -L {params.path} > {output.fasta} 2> {log};
+        fi
+        """)
 
+rule download_masked_genome_fasta:
+    output:
+        fasta = "downloads/genome_fasta/{genome_build}.fa"
+    wildcard_constraints:
+        genome_build = ".+_masked"
+    params:
+        url = lambda w: config["genome_builds"][w.genome_build]["genome_fasta_url"]
+    shell:
+        "curl -L {params.url} | "
+        "gzip -d > {output.fasta} "
 
 rule download_main_chromosomes:
     input:
@@ -333,13 +402,8 @@ rule download_liftover_chains:
 rule download_sdf: 
     output: 
         sdf = directory("downloads/sdf/{genome_build}/sdf")
-    params: 
-        build = lambda w: {
-            "grch37": "1000g_v37_phase2.sdf", 
-            "hs37d5": "1000g_v37_phase2.sdf",
-            "hg19": "hg19.sdf", 
-            "hg38": "GRCh38.sdf"
-        }[w.genome_build]
+    params:
+        build = lambda w: SDF_VERSION_MAP[w.genome_build]
     shell: 
         op.as_one_line("""
         wget -qO {output.sdf}.zip https://s3.amazonaws.com/rtg-datasets/references/{params.build}.zip && 
@@ -352,7 +416,7 @@ rule download_sdf:
 
 
 def get_matching_download_rules(file):
-    ignored_rules = ["download_genome_fasta", "download_sdf"]
+    ignored_rules = ["download_genome_fasta", "download_masked_genome_fasta", "download_sdf", "download_sigprofiler_genome"]
     rule_names = [ r for r in dir(rules) if r.startswith("download_")]
     rule_names = [ r for r in rule_names if r not in ignored_rules ]
     rule_list = [ getattr(rules, name) for name in rule_names ]
@@ -365,10 +429,11 @@ def get_matching_download_rules(file):
         # At least one output file should produce
         num_matches = []
         for output_file in r.output:
-            assert "{version}" in output_file, (
-                f"The `{rule_name}` download rule doesn't have a `{{version}}` "
-                f"wildcard in the output file ('{output_file}')."
-            )
+            if rule_name != "download_capspace_bed":  # Workaround since we don't really care about the provider for the capture space
+                assert "{version}" in output_file, (
+                    f"The `{rule_name}` download rule doesn't have a `{{version}}` "
+                    f"wildcard in the output file ('{output_file}')."
+                )
             matches = smk.io.glob_wildcards(output_file, [file])
             num_matches.append(len(matches[0]))
         if any(num > 0 for num in num_matches):
@@ -464,6 +529,57 @@ def get_download_file(file):
     return get_download_file_custom
 
 
+### CAPTURE SPACE ###
+rule download_capspace_bed:
+    output:
+        capture_bed = "downloads/capture_space/{capture_space}.{genome_build}.bed"
+    log:
+        "downloads/capture_space/{capture_space}.{genome_build}.bed.log"
+    params:
+        path = lambda w: config["capture_space"][w.capture_space]["capture_bed_file"] if "capture_bed_file" in config["capture_space"][w.capture_space] else config["capture_space"][w.capture_space]["capture_bed_url"],
+        provider = lambda w: config["capture_space"][w.capture_space]["provider"]
+    shell:
+        op.as_one_line("""
+        if [ -e {params.path} ]; then
+            cat {params.path} > {output.capture_bed} 2> {log};
+        else
+            curl -L {params.path} > {output.capture_bed} 2> {log};
+        fi
+        """)
+
+rule add_remove_chr_prefix_bed:
+    input:
+        capture_bed = rules.download_capspace_bed.output.capture_bed
+    output:
+        converted_bed = "downloads/capture_space/{capture_space}.{genome_build}.{chr_status}.bed"
+    run:
+        # Converts the specified BED file and adds/removes chr prefixes
+        if wildcards.chr_status == "chr":  # i.e. we need to add a chr prefix
+            add_chr = True
+        else:
+            add_chr = False
+
+        # Process the BED file
+        with open(input.capture_bed) as f, open(output.converted_bed, "w") as o:
+            i = 0
+            for line in f:
+                i += 1
+                # Make sure that this BED entry is chr-prefixed or not
+                if add_chr and line.startswith("chr"):
+                    # We were asked to add a chr prefix, but one already exists
+                    raise AttributeError("I was asked to add a \'chr\' prefix to \'%s\', but that BED is already \'chr\' prefixed on line %s" % (input.capture_bed, line))
+                if not add_chr and not line.startswith("chr"):
+                    # We were asked to remove a chr prefix, but there isn't one
+                    raise AttributeError("I was asked to remove a \'chr\' prefix \'%s\', but it isn't chr prefixed on line %s" % (input.capture_bed, line))
+
+                if add_chr:
+                    line = "chr" + line
+                else:
+                    line = line.replace("chr", "")
+
+                o.write(line)
+
+
 ##### SHARED #####