From a188b906049172d72e732895c47c6adb145a1a16 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Mon, 1 Mar 2021 18:17:40 -0800 Subject: [PATCH 01/35] Rule_battenberg_get_refrence v1.0 --- modules/battenberg/1.1/battenberg.smk | 57 +++++++++++++++++-- modules/battenberg/1.1/config/default.yaml | 3 +- modules/battenberg/1.1/envs/wget-1.20.1.yaml | 1 + .../battenberg/1.1/src/battenberg_wgs_hg38.R | 36 +++++++----- 4 files changed, 78 insertions(+), 19 deletions(-) create mode 120000 modules/battenberg/1.1/envs/wget-1.20.1.yaml diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index ffc86caf..88116ff9 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -50,11 +50,47 @@ _battenberg_CFG = CFG # Define rules to be run locally when using a compute cluster localrules: + _battenberg_get_refrence _battenberg_all - ##### RULES ##### +# Downloads the refrence files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . +rule _battenberg_get_refrence: + output: + battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), + impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", + probloci = CFG["dirs"]["inputs"] + "reference/{genome_build}/probloci.txt.gz", + battenberg_wgs_replic_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_replic_correction_1000g_v3"), + battenberg_gc_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_gc_correction_1000g_v3"), + genomesloci = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") + params: + url = "https://www.bcgsc.ca/downloads/morinlab/reference", + folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", + build = lambda w: "hg38" if "38" in str({w.genome_build}) else "grch37" + shell: + op.as_one_line(""" + wget -qO- {params.url}/battenberg_impute_{params.build}.tar.gz | + tar -xvz > {output.battenberg_impute} -C {params.folder} + && + wget -qO- {params.url}/battenberg_{params.build}_gc_correction.tar.gz | + tar -xvz > {output.battenberg_gc_correction} -C {params.folder} + && + wget -qO- {params.url}/battenberg_1000genomesloci_{params.build}.tar.gz | + tar -xvz > {output.genomesloci} -C {params.folder} + && + wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' + && + python scripts/refrence_correction.py + && + wget -qO- {params.url}/battenberg_{params.build}_replic_correction.tar.gz | + tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} + && + wget -O {output.probloci} {params.url}/probloci_{params.build}.txt.gz + + """) + + # Symlinks the input files into the module results directory (under '00-inputs/') rule _battenberg_input_bam: input: @@ -107,14 +143,18 @@ rule _infer_patient_sex: # This rule runs the entire Battenberg pipeline. Eventually we may want to set this rule up to allow re-starting # of partially completed jobs (e.g. if they run out of RAM and are killed by the cluster, they can automatically retry) -# TODO: this rule needs to be modified to rely on reference_files and allow setup (downloading) of the Battenberg references rule _run_battenberg: input: tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam", normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", installed = "config/envs/battenberg_dependencies_installed.success", sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", - fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa") + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), + impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", + probloci = CFG["dirs"]["inputs"] + "reference/{genome_build}/probloci.txt.gz", + battenberg_wgs_replic_correction = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_replic_correction_1000g_v3"), + battenberg_gc_correction = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_gc_correction_1000g_v3"), + genomesloci = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") output: refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt", sub=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.txt", @@ -145,7 +185,8 @@ rule _run_battenberg: sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} - -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --i {input.impute_info} --prob {input.probloci} + --bwrc {input.battenberg_wgs_replic_correction} --brc {input.battenberg_gc_correction} --gl {input.genomesloci} -o {params.out_dir} --sex $sex --reference {params.reference_path} {params.chr_prefixed} --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) @@ -212,14 +253,19 @@ rule _battenberg_output_seg: op.relative_symlink(input.sub, output.sub,in_module=True) op.relative_symlink(input.cp, output.cp,in_module=True) + + # Generates the target sentinels for each run, which generate the symlinks rule _battenberg_all: input: expand( [ + rules._run_battenberg.output.sub, rules._battenberg_output_seg.output.seg, rules._battenberg_cleanup.output.complete + + ], zip, # Run expand() with zip(), not product() seq_type=CFG["runs"]["tumour_seq_type"], @@ -229,6 +275,9 @@ rule _battenberg_all: pair_status=CFG["runs"]["pair_status"]) + + + ##### CLEANUP ##### diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index 384415ef..8213ad29 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -21,7 +21,8 @@ lcr-modules: grch37: " " conda_envs: - battenberg: "{MODSDIR}/envs/battenberg-1.0.yaml" + battenberg: "{MODSDIR}/envs/battenberg-1.1.yaml" + wget: "{MODSDIR}/envs/wget-1.20.1.yaml" resources: battenberg: diff --git a/modules/battenberg/1.1/envs/wget-1.20.1.yaml b/modules/battenberg/1.1/envs/wget-1.20.1.yaml new file mode 120000 index 00000000..ead79548 --- /dev/null +++ b/modules/battenberg/1.1/envs/wget-1.20.1.yaml @@ -0,0 +1 @@ +/home/lakshay/lcr-modules/envs/wget/wget-1.20.1.yaml \ No newline at end of file diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 2bcbd25c..7e19f728 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -19,7 +19,14 @@ option_list = list( make_option(c("--reference"), type="character", default=NULL, help="Path to reference files", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), - make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared.") + make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), + make_option(c("--bwrc"), type="character", default=NULL, help="Path to reference file", metavar="character"), + + make_option(c("--brc"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("--gl"), type="character", default=NULL, help="Path to reference file", metavar="character"), + + make_option(c("--i"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("--prob"), type="character", default=NULL, help="Path to reference file", metavar="character") ) opt_parser = OptionParser(option_list=option_list) @@ -50,14 +57,6 @@ verbose = TRUE ############################################################################### # General static -IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/battenberg_impute_v3/impute_info_fix.txt") -print(IMPUTEINFOFILE) -G1000PREFIX = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") -G1000PREFIX_AC = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") -GCCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") -REPLICCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") -IMPUTE_EXE = "impute2" #install using conda - PLATFORM_GAMMA = 1 PHASING_GAMMA = 1 SEGMENTATION_GAMMA = 10 @@ -75,17 +74,26 @@ MIN_BASE_QUAL = 20 MIN_MAP_QUAL = 35 CALC_SEG_BAF_OPTION = 3 -# WGS specific static -ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount -PROBLEMLOCI = paste0(REFERENCE_BASE,"/probloci_270415.txt.gz") - -print(PROBLEMLOCI); # Change to work directory and load the chromosome information original_dir = getwd() setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) +IMPUTEINFOFILE = paste0(normalizePath(original_dir,"\\"), "/",opt$i) +print(IMPUTEINFOFILE) + +REPLICCORRECTPREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$bwrc, "/1000_genomes_replication_timing_chr_") +G1000PREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$gl, "/1000genomesAlleles2012_chr") +G1000PREFIX_AC = paste0(normalizePath(original_dir,"\\"), "/",opt$gl, "/1000genomesloci2012_chr") +GCCORRECTPREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$brc, "/1000_genomes_GC_corr_chr_") +IMPUTE_EXE = "impute2" #install using conda + +# WGS specific static +ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount +PROBLEMLOCI = paste0(normalizePath(original_dir,"\\"), "/",opt$prob) + +print(PROBLEMLOCI); #this should be the full path to the files after changing directories From 7ff9242a0a20899382e8f3e50bfcf49177ed0e75 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 2 Mar 2021 01:54:37 -0800 Subject: [PATCH 02/35] Added the script --- demo/scripts/refrence_correction.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 demo/scripts/refrence_correction.py diff --git a/demo/scripts/refrence_correction.py b/demo/scripts/refrence_correction.py new file mode 100644 index 00000000..7e48ac69 --- /dev/null +++ b/demo/scripts/refrence_correction.py @@ -0,0 +1,13 @@ +import os +cwd = os.getcwd() +cwd +fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/impute_info.txt", 'r') +filedata = fileIN.read() +fileIN.close() + +newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/battenberg_impute_v3") + +fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/impute_info.txt", 'w') +fileOut.write(newdata) +fileOut.close() + From 192b1eb81be0fef682df8cfbdbaa6fac8af8753a Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 2 Mar 2021 09:56:03 -0800 Subject: [PATCH 03/35] added genome generality to script --- demo/scripts/refrence_correction.py | 7 ++++--- modules/battenberg/1.1/battenberg.smk | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/demo/scripts/refrence_correction.py b/demo/scripts/refrence_correction.py index 7e48ac69..11767004 100644 --- a/demo/scripts/refrence_correction.py +++ b/demo/scripts/refrence_correction.py @@ -1,13 +1,14 @@ import os +import sys cwd = os.getcwd() cwd -fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/impute_info.txt", 'r') +fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/impute_info.txt", 'r') filedata = fileIN.read() fileIN.close() -newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/battenberg_impute_v3") +newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/battenberg_impute_v3") -fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/grch37/impute_info.txt", 'w') +fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/impute_info.txt", 'w') fileOut.write(newdata) fileOut.close() diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 88116ff9..0adda115 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -81,7 +81,7 @@ rule _battenberg_get_refrence: && wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' && - python scripts/refrence_correction.py + python scripts/refrence_correction.py {params.build} && wget -qO- {params.url}/battenberg_{params.build}_replic_correction.tar.gz | tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} From 406fa3dd5ecf635ad876636e424f5a129b5b1db4 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 2 Mar 2021 10:07:04 -0800 Subject: [PATCH 04/35] Made changes in error message for R file --- modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 7e19f728..ed3c57f9 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -20,13 +20,13 @@ option_list = list( make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), - make_option(c("--bwrc"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("--bwrc"), type="character", default=NULL, help="Path to battenberg_replic_correction file", metavar="character"), - make_option(c("--brc"), type="character", default=NULL, help="Path to reference file", metavar="character"), - make_option(c("--gl"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("--brc"), type="character", default=NULL, help="Path to battenberg_gc_correction file", metavar="character"), + make_option(c("--gl"), type="character", default=NULL, help="Path to genomesloci file", metavar="character"), - make_option(c("--i"), type="character", default=NULL, help="Path to reference file", metavar="character"), - make_option(c("--prob"), type="character", default=NULL, help="Path to reference file", metavar="character") + make_option(c("--i"), type="character", default=NULL, help="Path to impute_info file", metavar="character"), + make_option(c("--prob"), type="character", default=NULL, help="Path to probloci file", metavar="character") ) opt_parser = OptionParser(option_list=option_list) From e157cc0c88e6e00e5bbb6a05a343bbbf278fea22 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 2 Mar 2021 12:01:25 -0800 Subject: [PATCH 05/35] Made the symlink relative --- modules/battenberg/1.1/envs/wget-1.20.1.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/battenberg/1.1/envs/wget-1.20.1.yaml b/modules/battenberg/1.1/envs/wget-1.20.1.yaml index ead79548..86501e72 120000 --- a/modules/battenberg/1.1/envs/wget-1.20.1.yaml +++ b/modules/battenberg/1.1/envs/wget-1.20.1.yaml @@ -1 +1 @@ -/home/lakshay/lcr-modules/envs/wget/wget-1.20.1.yaml \ No newline at end of file +../../../../envs/wget/wget-1.20.1.yaml \ No newline at end of file From 14502b026bc04073a2a30c4e394400e4a03d75b0 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Wed, 3 Mar 2021 09:53:11 -0800 Subject: [PATCH 06/35] Changed some paths --- modules/battenberg/1.1/battenberg.smk | 9 ++------ .../battenberg/1.1/src/battenberg_wgs_hg38.R | 22 ++++++++----------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 0adda115..095ecf53 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -150,11 +150,7 @@ rule _run_battenberg: installed = "config/envs/battenberg_dependencies_installed.success", sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), - impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", - probloci = CFG["dirs"]["inputs"] + "reference/{genome_build}/probloci.txt.gz", - battenberg_wgs_replic_correction = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_replic_correction_1000g_v3"), - battenberg_gc_correction = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_gc_correction_1000g_v3"), - genomesloci = (CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") + ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" output: refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt", sub=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.txt", @@ -185,8 +181,7 @@ rule _run_battenberg: sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} - -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --i {input.impute_info} --prob {input.probloci} - --bwrc {input.battenberg_wgs_replic_correction} --brc {input.battenberg_gc_correction} --gl {input.genomesloci} + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {input.ref} -o {params.out_dir} --sex $sex --reference {params.reference_path} {params.chr_prefixed} --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index ed3c57f9..11e40f8f 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -20,13 +20,7 @@ option_list = list( make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), - make_option(c("--bwrc"), type="character", default=NULL, help="Path to battenberg_replic_correction file", metavar="character"), - - make_option(c("--brc"), type="character", default=NULL, help="Path to battenberg_gc_correction file", metavar="character"), - make_option(c("--gl"), type="character", default=NULL, help="Path to genomesloci file", metavar="character"), - - make_option(c("--i"), type="character", default=NULL, help="Path to impute_info file", metavar="character"), - make_option(c("--prob"), type="character", default=NULL, help="Path to probloci file", metavar="character") + make_option(c("--ref"), type="character", default=NULL, help="Path to reference file", metavar="character") ) opt_parser = OptionParser(option_list=option_list) @@ -80,18 +74,20 @@ original_dir = getwd() setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) -IMPUTEINFOFILE = paste0(normalizePath(original_dir,"\\"), "/",opt$i) + +REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$ref) +IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") print(IMPUTEINFOFILE) -REPLICCORRECTPREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$bwrc, "/1000_genomes_replication_timing_chr_") -G1000PREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$gl, "/1000genomesAlleles2012_chr") -G1000PREFIX_AC = paste0(normalizePath(original_dir,"\\"), "/",opt$gl, "/1000genomesloci2012_chr") -GCCORRECTPREFIX = paste0(normalizePath(original_dir,"\\"), "/",opt$brc, "/1000_genomes_GC_corr_chr_") +REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") +G1000PREFIX = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") +G1000PREFIX_AC = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") +GCCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") IMPUTE_EXE = "impute2" #install using conda # WGS specific static ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount -PROBLEMLOCI = paste0(normalizePath(original_dir,"\\"), "/",opt$prob) +PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") print(PROBLEMLOCI); From ec3b40c1cc216c5bf8006048a96f7d252cb247c7 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Thu, 11 Mar 2021 10:24:17 -0800 Subject: [PATCH 07/35] changed refrence file path to a more relative one --- modules/battenberg/1.1/battenberg.smk | 13 ++++++++----- modules/battenberg/1.1/src/refrence_correction.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 modules/battenberg/1.1/src/refrence_correction.py diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 095ecf53..9558fcef 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -67,7 +67,8 @@ rule _battenberg_get_refrence: params: url = "https://www.bcgsc.ca/downloads/morinlab/reference", folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", - build = lambda w: "hg38" if "38" in str({w.genome_build}) else "grch37" + build = lambda w: "hg38" if "38" in str({w.genome_build}) else "grch37", + PATH = CFG['inputs']['src_dir'] shell: op.as_one_line(""" wget -qO- {params.url}/battenberg_impute_{params.build}.tar.gz | @@ -81,7 +82,7 @@ rule _battenberg_get_refrence: && wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' && - python scripts/refrence_correction.py {params.build} + python {params.PATH}/refrence_correction.py {params.build} && wget -qO- {params.url}/battenberg_{params.build}_replic_correction.tar.gz | tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} @@ -150,7 +151,8 @@ rule _run_battenberg: installed = "config/envs/battenberg_dependencies_installed.success", sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), - ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" + impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt" + output: refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt", sub=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.txt", @@ -168,7 +170,8 @@ rule _run_battenberg: reference_path = lambda w: _battenberg_CFG["reference_path"][w.genome_build], script = CFG["inputs"]["battenberg_script"], chr_prefixed = lambda w: _battenberg_CFG["options"]["chr_prefixed_reference"][w.genome_build], - out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}" + out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}", + ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" conda: CFG["conda_envs"]["battenberg"] resources: @@ -181,7 +184,7 @@ rule _run_battenberg: sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} - -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {input.ref} + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} -o {params.out_dir} --sex $sex --reference {params.reference_path} {params.chr_prefixed} --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/src/refrence_correction.py b/modules/battenberg/1.1/src/refrence_correction.py new file mode 100644 index 00000000..60bf6bf2 --- /dev/null +++ b/modules/battenberg/1.1/src/refrence_correction.py @@ -0,0 +1,14 @@ +import os +import sys +cwd = os.getcwd() + +fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'r') +filedata = fileIN.read() +fileIN.close() + +newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/battenberg_impute_v3") + +fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'w') +fileOut.write(newdata) +fileOut.close() + From f0780c267ea566e45e631d56b7b51c3606e9ca1a Mon Sep 17 00:00:00 2001 From: Lakshay-sethi <58126894+Lakshay-sethi@users.noreply.github.com> Date: Thu, 11 Mar 2021 14:57:59 -0500 Subject: [PATCH 08/35] Changed script location to src directory in module --- demo/scripts/refrence_correction.py | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 demo/scripts/refrence_correction.py diff --git a/demo/scripts/refrence_correction.py b/demo/scripts/refrence_correction.py deleted file mode 100644 index 11767004..00000000 --- a/demo/scripts/refrence_correction.py +++ /dev/null @@ -1,14 +0,0 @@ -import os -import sys -cwd = os.getcwd() -cwd -fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/impute_info.txt", 'r') -filedata = fileIN.read() -fileIN.close() - -newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/battenberg_impute_v3") - -fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[0] + "/impute_info.txt", 'w') -fileOut.write(newdata) -fileOut.close() - From d4ee2d1eeeed44d4283ad65db312f7a3c60add18 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 12 Mar 2021 11:43:10 -0800 Subject: [PATCH 09/35] Removed reductant code --- modules/battenberg/1.1/battenberg.smk | 3 +-- modules/battenberg/1.1/config/default.yaml | 4 ---- modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 3 --- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 9558fcef..913a15fc 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -167,7 +167,6 @@ rule _run_battenberg: stdout = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stdout.log", stderr = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stderr.log" params: - reference_path = lambda w: _battenberg_CFG["reference_path"][w.genome_build], script = CFG["inputs"]["battenberg_script"], chr_prefixed = lambda w: _battenberg_CFG["options"]["chr_prefixed_reference"][w.genome_build], out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}", @@ -185,7 +184,7 @@ rule _run_battenberg: echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} - -o {params.out_dir} --sex $sex --reference {params.reference_path} {params.chr_prefixed} --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + -o {params.out_dir} --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index 8213ad29..14bb0917 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -10,10 +10,6 @@ lcr-modules: scratch_subdirectories: [] - reference_path: - hg38: "__UPDATE__" - grch37: "__UPDATE__" - options: #update and add/remove these lines as needed for the reference genomes being used. chr_prefixed_reference: diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 11e40f8f..acb86f51 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -16,7 +16,6 @@ option_list = list( make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), - make_option(c("--reference"), type="character", default=NULL, help="Path to reference files", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), @@ -26,8 +25,6 @@ option_list = list( opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) -REFERENCE_BASE = opt$reference - TUMOURNAME = opt$tumourname NORMALNAME = opt$normalname From 793d529f8a3cd8b166c0bb748d3dd691b626d664 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Mon, 15 Mar 2021 00:17:44 -0700 Subject: [PATCH 10/35] removed prefixes --- envs/samtools/samtools-1.9.yaml | 1 - envs/wget/wget-1.20.1.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/envs/samtools/samtools-1.9.yaml b/envs/samtools/samtools-1.9.yaml index 07229243..3742eddf 100644 --- a/envs/samtools/samtools-1.9.yaml +++ b/envs/samtools/samtools-1.9.yaml @@ -24,5 +24,4 @@ dependencies: - tk=8.6.10 - xz=5.2.5 - zlib=1.2.11 -prefix: /home/bgrande/miniconda3/envs/test-samtools diff --git a/envs/wget/wget-1.20.1.yaml b/envs/wget/wget-1.20.1.yaml index beb55a09..fe718c1c 100644 --- a/envs/wget/wget-1.20.1.yaml +++ b/envs/wget/wget-1.20.1.yaml @@ -17,4 +17,4 @@ dependencies: - openssl=1.1.1h - wget=1.20.1 - zlib=1.2.11 -prefix: /home/lhilton/miniconda3/envs/wget-test + From 4f61dbb2a32fb8ef06469ab96c15cbcd97294693 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Wed, 17 Mar 2021 11:51:34 -0700 Subject: [PATCH 11/35] Handled some typo's --- modules/battenberg/1.1/battenberg.smk | 8 ++++---- .../{refrence_correction.py => reference_correction.py} | 0 2 files changed, 4 insertions(+), 4 deletions(-) rename modules/battenberg/1.1/src/{refrence_correction.py => reference_correction.py} (100%) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 913a15fc..4c1be086 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -50,13 +50,13 @@ _battenberg_CFG = CFG # Define rules to be run locally when using a compute cluster localrules: - _battenberg_get_refrence + _battenberg_get_reference _battenberg_all ##### RULES ##### -# Downloads the refrence files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . -rule _battenberg_get_refrence: +# Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . +rule _battenberg_get_reference: output: battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", @@ -82,7 +82,7 @@ rule _battenberg_get_refrence: && wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' && - python {params.PATH}/refrence_correction.py {params.build} + python {params.PATH}/reference_correction.py {params.build} && wget -qO- {params.url}/battenberg_{params.build}_replic_correction.tar.gz | tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} diff --git a/modules/battenberg/1.1/src/refrence_correction.py b/modules/battenberg/1.1/src/reference_correction.py similarity index 100% rename from modules/battenberg/1.1/src/refrence_correction.py rename to modules/battenberg/1.1/src/reference_correction.py From 0b8a5f4a7b9f8df6ce21d443a0b2dbba5b7a864b Mon Sep 17 00:00:00 2001 From: Lakshay Date: Thu, 18 Mar 2021 12:08:08 -0700 Subject: [PATCH 12/35] changed code place in regards to code review --- .../battenberg/1.1/src/battenberg_wgs_hg38.R | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index acb86f51..4bfaf7ec 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -25,6 +25,7 @@ option_list = list( opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) +REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$ref) TUMOURNAME = opt$tumourname NORMALNAME = opt$normalname @@ -48,6 +49,14 @@ verbose = TRUE ############################################################################### # General static +IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") +print(IMPUTEINFOFILE) +REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") +G1000PREFIX = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") +G1000PREFIX_AC = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") +GCCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") +IMPUTE_EXE = "impute2" #install using conda + PLATFORM_GAMMA = 1 PHASING_GAMMA = 1 SEGMENTATION_GAMMA = 10 @@ -65,6 +74,11 @@ MIN_BASE_QUAL = 20 MIN_MAP_QUAL = 35 CALC_SEG_BAF_OPTION = 3 +# WGS specific static +ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount +PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") + +print(PROBLEMLOCI); # Change to work directory and load the chromosome information original_dir = getwd() @@ -72,21 +86,6 @@ setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) -REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$ref) -IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") -print(IMPUTEINFOFILE) - -REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") -G1000PREFIX = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") -G1000PREFIX_AC = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") -GCCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") -IMPUTE_EXE = "impute2" #install using conda - -# WGS specific static -ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount -PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") - -print(PROBLEMLOCI); #this should be the full path to the files after changing directories From c66c02efbce0210910b79158d390a37306a043d9 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Thu, 18 Mar 2021 13:45:56 -0700 Subject: [PATCH 13/35] Added chr_genome parameter --- modules/battenberg/1.1/battenberg.smk | 2 +- modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 4c1be086..e9706379 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -184,7 +184,7 @@ rule _run_battenberg: echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} - -o {params.out_dir} --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + -o {params.out_dir} -chr {params.chr_prefixed} --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 4bfaf7ec..ecbfbe4f 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -17,7 +17,7 @@ option_list = list( make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), - make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), + make_option(c("-chr","--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), make_option(c("--ref"), type="character", default=NULL, help="Path to reference file", metavar="character") ) From c7c8d9413eeee277f61352e55f23c12f229b199e Mon Sep 17 00:00:00 2001 From: Lakshay Date: Thu, 18 Mar 2021 13:48:58 -0700 Subject: [PATCH 14/35] added conda env to refrence rule --- modules/battenberg/1.1/battenberg.smk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index e9706379..55baa07a 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -69,6 +69,8 @@ rule _battenberg_get_reference: folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", build = lambda w: "hg38" if "38" in str({w.genome_build}) else "grch37", PATH = CFG['inputs']['src_dir'] + conda: + CFG["conda_envs"]["battenberg"] shell: op.as_one_line(""" wget -qO- {params.url}/battenberg_impute_{params.build}.tar.gz | From 35be3e6d6c7b21eeeb2c65dfd50a910eb8e782f1 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 6 Apr 2021 16:28:32 -0700 Subject: [PATCH 15/35] enabled chr prefixed functionality --- modules/battenberg/1.1/battenberg.smk | 57 +++++++++++++------ modules/battenberg/1.1/config/default.yaml | 12 +--- .../battenberg/1.1/src/battenberg_wgs_hg38.R | 6 +- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 55baa07a..bcf26f6a 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -27,8 +27,10 @@ except ModuleNotFoundError: current_version = pkg_resources.get_distribution("oncopipe").version if version.parse(current_version) < version.parse(min_oncopipe_version): - print(f"ERROR: oncopipe version installed: {current_version}") - print(f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment") + logger.warning( + '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m' + ) sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)") # End of dependency checking section @@ -50,13 +52,33 @@ _battenberg_CFG = CFG # Define rules to be run locally when using a compute cluster localrules: - _battenberg_get_reference + _battenberg_get_refrence _battenberg_all +VERSION_MAP = { + "hg19": "grch37", + "grch37": "grch37", + "hs37d5": "grch37", + "hg38": "hg38", + "grch38": "hg38", + "grch38-legacy": "hg38" + +} + +possible_genome_builds = VERSION_MAP.keys() +for genome_build in CFG["runs"]["tumour_genome_build"]: + assert genome_build in possible_genome_builds, ( + "Samples table includes genome builds not yet compatible with this module. " + "This module is currently only compatible with {possible_genome_builds}. " + ) + +wildcard_constraints: + genome_build = "|".join(VERSION_MAP.keys()) + ##### RULES ##### -# Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . -rule _battenberg_get_reference: +# Downloads the refrence files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . +rule _battenberg_get_refrence: output: battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", @@ -66,30 +88,30 @@ rule _battenberg_get_reference: genomesloci = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") params: url = "https://www.bcgsc.ca/downloads/morinlab/reference", + alt_build = lambda w: VERSION_MAP[w.genome_build], folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", - build = lambda w: "hg38" if "38" in str({w.genome_build}) else "grch37", + build = lambda w: "grch37" if "37" in str({w.genome_build}) else "hg38", PATH = CFG['inputs']['src_dir'] - conda: - CFG["conda_envs"]["battenberg"] + shell: op.as_one_line(""" - wget -qO- {params.url}/battenberg_impute_{params.build}.tar.gz | + wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | tar -xvz > {output.battenberg_impute} -C {params.folder} && - wget -qO- {params.url}/battenberg_{params.build}_gc_correction.tar.gz | + wget -qO- {params.url}/battenberg_{params.alt_build}_gc_correction.tar.gz | tar -xvz > {output.battenberg_gc_correction} -C {params.folder} && - wget -qO- {params.url}/battenberg_1000genomesloci_{params.build}.tar.gz | + wget -qO- {params.url}/battenberg_1000genomesloci_{params.alt_build}.tar.gz | tar -xvz > {output.genomesloci} -C {params.folder} && wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' && - python {params.PATH}/reference_correction.py {params.build} + python {params.PATH}/reference_correction.py {genome_build} && - wget -qO- {params.url}/battenberg_{params.build}_replic_correction.tar.gz | + wget -qO- {params.url}/battenberg_{params.alt_build}_replic_correction.tar.gz | tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} && - wget -O {output.probloci} {params.url}/probloci_{params.build}.txt.gz + wget -O {output.probloci} {params.url}/probloci_{params.alt_build}.txt.gz """) @@ -133,6 +155,8 @@ rule _infer_patient_sex: **CFG["resources"]["infer_sex"] log: stderr = CFG["logs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}_infer_sex_stderr.log" + conda: + CFG["conda_envs"]["samtools"] group: "setup_run" threads: 8 shell: @@ -169,8 +193,8 @@ rule _run_battenberg: stdout = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stdout.log", stderr = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stderr.log" params: + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), script = CFG["inputs"]["battenberg_script"], - chr_prefixed = lambda w: _battenberg_CFG["options"]["chr_prefixed_reference"][w.genome_build], out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}", ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" conda: @@ -181,12 +205,13 @@ rule _run_battenberg: CFG["threads"]["battenberg"] shell: op.as_one_line(""" + if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='true'; else chr_prefixed=' '; fi; echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stdout}; sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} - -o {params.out_dir} -chr {params.chr_prefixed} --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + -o {params.out_dir} --chr $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index 14bb0917..118d8021 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -1,5 +1,4 @@ lcr-modules: - battenberg: inputs: # Available wildcards: {seq_type} {genome_build} {sample_id} @@ -10,16 +9,11 @@ lcr-modules: scratch_subdirectories: [] - options: - #update and add/remove these lines as needed for the reference genomes being used. - chr_prefixed_reference: - hg38: " --chr_prefixed_genome " - grch37: " " - conda_envs: battenberg: "{MODSDIR}/envs/battenberg-1.1.yaml" wget: "{MODSDIR}/envs/wget-1.20.1.yaml" - + samtools: "{MODSDIR}/envs/samtools-1.9.yaml" + resources: battenberg: mem_mb: 200000 @@ -27,7 +21,7 @@ lcr-modules: infer_sex: mem_mb: 20000 bam: 1 - + threads: battenberg: 24 #ideal for processing all chromosomes at once diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index ecbfbe4f..66021bf5 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -17,13 +17,14 @@ option_list = list( make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), - make_option(c("-chr","--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), + make_option(c("--chr"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), make_option(c("--ref"), type="character", default=NULL, help="Path to reference file", metavar="character") ) opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) +original_dir = getwd() REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$ref) TUMOURNAME = opt$tumourname @@ -33,7 +34,7 @@ REFERENCE_FASTA = opt$reference_fasta print(paste("using fasta:",REFERENCE_FASTA)) IS.MALE = opt$sex=="male" | opt$sex=="Male" RUN_DIR = opt$o -CHR_PREFIXED = opt$chr_prefixed_genome +CHR_PREFIXED = opt$chr print(paste("chr prefix present?",CHR_PREFIXED)) SKIP_ALLELECOUNTING = opt$skip_allelecount SKIP_PREPROCESSING = opt$skip_preprocessing @@ -81,7 +82,6 @@ PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") print(PROBLEMLOCI); # Change to work directory and load the chromosome information -original_dir = getwd() setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) From a0797beb378558895c403baba8db190b97557388 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Wed, 7 Apr 2021 11:41:49 -0700 Subject: [PATCH 16/35] addresed review suggestions --- modules/battenberg/1.1/battenberg.smk | 11 +++++------ modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index bcf26f6a..50acf910 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -52,7 +52,7 @@ _battenberg_CFG = CFG # Define rules to be run locally when using a compute cluster localrules: - _battenberg_get_refrence + _battenberg_get_reference _battenberg_all VERSION_MAP = { @@ -77,8 +77,8 @@ wildcard_constraints: ##### RULES ##### -# Downloads the refrence files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . -rule _battenberg_get_refrence: +# Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . +rule _battenberg_get_reference: output: battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", @@ -90,7 +90,6 @@ rule _battenberg_get_refrence: url = "https://www.bcgsc.ca/downloads/morinlab/reference", alt_build = lambda w: VERSION_MAP[w.genome_build], folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", - build = lambda w: "grch37" if "37" in str({w.genome_build}) else "hg38", PATH = CFG['inputs']['src_dir'] shell: @@ -205,13 +204,13 @@ rule _run_battenberg: CFG["threads"]["battenberg"] shell: op.as_one_line(""" - if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='true'; else chr_prefixed=' '; fi; + if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='--chr_prefixed_genome'; else chr_prefixed=' '; fi; echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stdout}; sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} - -o {params.out_dir} --chr $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + -o {params.out_dir} --chr_prefixed_genome $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 66021bf5..86d3b6d1 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -17,7 +17,7 @@ option_list = list( make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), - make_option(c("--chr"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), + make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), make_option(c("--ref"), type="character", default=NULL, help="Path to reference file", metavar="character") ) @@ -34,7 +34,7 @@ REFERENCE_FASTA = opt$reference_fasta print(paste("using fasta:",REFERENCE_FASTA)) IS.MALE = opt$sex=="male" | opt$sex=="Male" RUN_DIR = opt$o -CHR_PREFIXED = opt$chr +CHR_PREFIXED = opt$chr_prefixed_genome print(paste("chr prefix present?",CHR_PREFIXED)) SKIP_ALLELECOUNTING = opt$skip_allelecount SKIP_PREPROCESSING = opt$skip_preprocessing From 07869231e0422a5f3ba457b9facbbb5b06220e26 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 9 Apr 2021 00:18:20 -0700 Subject: [PATCH 17/35] answered kostia review --- modules/battenberg/1.1/battenberg.smk | 10 ---------- modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 5 ++--- modules/battenberg/1.1/src/reference_correction.py | 9 +++++++++ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 50acf910..e278c94d 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -65,16 +65,6 @@ VERSION_MAP = { } -possible_genome_builds = VERSION_MAP.keys() -for genome_build in CFG["runs"]["tumour_genome_build"]: - assert genome_build in possible_genome_builds, ( - "Samples table includes genome builds not yet compatible with this module. " - "This module is currently only compatible with {possible_genome_builds}. " - ) - -wildcard_constraints: - genome_build = "|".join(VERSION_MAP.keys()) - ##### RULES ##### # Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 86d3b6d1..15ef224a 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -34,7 +34,7 @@ REFERENCE_FASTA = opt$reference_fasta print(paste("using fasta:",REFERENCE_FASTA)) IS.MALE = opt$sex=="male" | opt$sex=="Male" RUN_DIR = opt$o -CHR_PREFIXED = opt$chr_prefixed_genome +CHR_PREFIXED = opt$chr print(paste("chr prefix present?",CHR_PREFIXED)) SKIP_ALLELECOUNTING = opt$skip_allelecount SKIP_PREPROCESSING = opt$skip_preprocessing @@ -52,10 +52,10 @@ verbose = TRUE # General static IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") print(IMPUTEINFOFILE) -REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") G1000PREFIX = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") G1000PREFIX_AC = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") GCCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") +REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") IMPUTE_EXE = "impute2" #install using conda PLATFORM_GAMMA = 1 @@ -86,7 +86,6 @@ setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) - #this should be the full path to the files after changing directories #debugging lines added here: diff --git a/modules/battenberg/1.1/src/reference_correction.py b/modules/battenberg/1.1/src/reference_correction.py index 60bf6bf2..dcd142be 100644 --- a/modules/battenberg/1.1/src/reference_correction.py +++ b/modules/battenberg/1.1/src/reference_correction.py @@ -1,3 +1,12 @@ +##### ATTRIBUTION ##### + + +# Original Author: Lakshay Sethi + +'''Comments + getcwd function of os package was used to retrieve the working directory. + argv function of sys package was used to uilize the parameters passed with the file. +''' import os import sys cwd = os.getcwd() From 5f3fea2b74c8971ce31bed39a44d730bfe4f55c0 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 9 Apr 2021 00:21:49 -0700 Subject: [PATCH 18/35] changed chr prefix variable --- modules/battenberg/1.1/src/battenberg_wgs_hg38.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 15ef224a..63653499 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -34,7 +34,7 @@ REFERENCE_FASTA = opt$reference_fasta print(paste("using fasta:",REFERENCE_FASTA)) IS.MALE = opt$sex=="male" | opt$sex=="Male" RUN_DIR = opt$o -CHR_PREFIXED = opt$chr +CHR_PREFIXED = opt$chr_prefixed_genome print(paste("chr prefix present?",CHR_PREFIXED)) SKIP_ALLELECOUNTING = opt$skip_allelecount SKIP_PREPROCESSING = opt$skip_preprocessing @@ -52,10 +52,10 @@ verbose = TRUE # General static IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") print(IMPUTEINFOFILE) -G1000PREFIX = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") -G1000PREFIX_AC = paste0(REFERENCE_BASE, "/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") -GCCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") -REPLICCORRECTPREFIX = paste0(REFERENCE_BASE, "/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") +G1000PREFIX = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") +G1000PREFIX_AC = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") +GCCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") +REPLICCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") IMPUTE_EXE = "impute2" #install using conda PLATFORM_GAMMA = 1 From ade8ef97a60b60b1a3e63647525e02e55e25d1d6 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Thu, 15 Apr 2021 12:08:35 -0700 Subject: [PATCH 19/35] chnages to battenberg folder --- modules/battenberg/1.1/battenberg.smk | 11 +++++---- .../battenberg/1.1/src/battenberg_wgs_hg38.R | 6 ++--- .../1.1/src/reference_correction.py | 23 +++++++++++++++---- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index e278c94d..092c420d 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -80,8 +80,8 @@ rule _battenberg_get_reference: url = "https://www.bcgsc.ca/downloads/morinlab/reference", alt_build = lambda w: VERSION_MAP[w.genome_build], folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", + build = "{genome_build}", PATH = CFG['inputs']['src_dir'] - shell: op.as_one_line(""" wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | @@ -93,9 +93,9 @@ rule _battenberg_get_reference: wget -qO- {params.url}/battenberg_1000genomesloci_{params.alt_build}.tar.gz | tar -xvz > {output.genomesloci} -C {params.folder} && - wget -O {output.impute_info} 'https://ora.ox.ac.uk/objects/uuid:2c1fec09-a504-49ab-9ce9-3f17bac531bc/download_file?file_format=plain&safe_filename=impute_info.txt&type_of_work=Dataset' + wget -O {output.impute_info} {params.url}/impute_info_{params.alt_build}.txt && - python {params.PATH}/reference_correction.py {genome_build} + python {params.PATH}/reference_correction.py {params.build} && wget -qO- {params.url}/battenberg_{params.alt_build}_replic_correction.tar.gz | tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} @@ -194,12 +194,13 @@ rule _run_battenberg: CFG["threads"]["battenberg"] shell: op.as_one_line(""" - if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='--chr_prefixed_genome'; else chr_prefixed=' '; fi; + if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='true'; else chr_prefixed='false'; fi; + echo "$chr_prefixed" echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stdout}; sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} - -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --ref {params.ref} + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --reference {params.ref} -o {params.out_dir} --chr_prefixed_genome $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index 63653499..fb9f3686 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -16,17 +16,17 @@ option_list = list( make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), + make_option(c("--reference"), type="character", default=NULL, help="Path to reference file", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), - make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared."), - make_option(c("--ref"), type="character", default=NULL, help="Path to reference file", metavar="character") + make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared.") ) opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) original_dir = getwd() -REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$ref) +REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$reference) TUMOURNAME = opt$tumourname NORMALNAME = opt$normalname diff --git a/modules/battenberg/1.1/src/reference_correction.py b/modules/battenberg/1.1/src/reference_correction.py index dcd142be..115ddc04 100644 --- a/modules/battenberg/1.1/src/reference_correction.py +++ b/modules/battenberg/1.1/src/reference_correction.py @@ -3,10 +3,25 @@ # Original Author: Lakshay Sethi -'''Comments - getcwd function of os package was used to retrieve the working directory. - argv function of sys package was used to uilize the parameters passed with the file. -''' +### Battenberg refrence file corrector ### +# Replaces the placeholder value in the impute_info.txt with the correct path +# where the reference files downloaded are stored. + +#!/usr/bin/env Python script +# +# Usage: +# python /reference_correction.py +# +# Notes: +# This script is intended for use with the Battenberg-1.1 module in LCR-modules. +# It expects to find the genome build at the input path, following +# the pattern reference_correction.py {genome_build}. These files should be in the +# 00-inputs subdirectory of the battenberg-1.1 directory present in the results directory. +# +# The file is made to be present in the src sub directory of the module. +# +# The sample table should adhere to LCR-modules guidelines. + import os import sys cwd = os.getcwd() From 31ad404fc437b55088d86b383676d4b522df3650 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 20 Apr 2021 13:51:29 -0700 Subject: [PATCH 20/35] enabled log in input battenberg and changed battenberg get reference from locla rule to clyuster --- modules/battenberg/1.1/battenberg.smk | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 092c420d..d22e53b7 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -52,7 +52,6 @@ _battenberg_CFG = CFG # Define rules to be run locally when using a compute cluster localrules: - _battenberg_get_reference _battenberg_all VERSION_MAP = { @@ -82,6 +81,10 @@ rule _battenberg_get_reference: folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", build = "{genome_build}", PATH = CFG['inputs']['src_dir'] + resources: + **CFG["resources"]["battenberg"] + threads: + CFG["threads"]["battenberg"] shell: op.as_one_line(""" wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | @@ -127,10 +130,12 @@ rule _install_battenberg: complete = "config/envs/battenberg_dependencies_installed.success" conda: CFG["conda_envs"]["battenberg"] + log: + input = CFG["logs"]["inputs"] + "input.log" shell: """ - R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")' && ##move some of this to config? - R -q -e 'devtools::install_github("morinlab/battenberg")' && ##move some of this to config? + R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")' >> {log.input} && ##move some of this to config? + R -q -e 'devtools::install_github("morinlab/battenberg")' >> {log.input} && ##move some of this to config? touch {output.complete}""" # this process is very fast on bam files and painfully slow on cram files. From dd62468c8115dfddd34dfba4a9db7bf7c583f911 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Tue, 20 Apr 2021 16:02:36 -0700 Subject: [PATCH 21/35] edited resources given to refrence download rule --- modules/battenberg/1.1/battenberg.smk | 4 ++-- modules/battenberg/1.1/config/default.yaml | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index d22e53b7..5414af77 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -82,9 +82,9 @@ rule _battenberg_get_reference: build = "{genome_build}", PATH = CFG['inputs']['src_dir'] resources: - **CFG["resources"]["battenberg"] + **CFG["resources"]["reference"] threads: - CFG["threads"]["battenberg"] + CFG["threads"]["reference"] shell: op.as_one_line(""" wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index 118d8021..ae533cc4 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -21,9 +21,13 @@ lcr-modules: infer_sex: mem_mb: 20000 bam: 1 + reference: + mem_mb: 8000 + bam: 1 threads: battenberg: 24 + reference: 5 #ideal for processing all chromosomes at once pairing_config: From 7da904033aec7b7bcf6772abcddbe12435293f01 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Wed, 21 Apr 2021 11:06:26 -0700 Subject: [PATCH 22/35] change thread count --- modules/battenberg/1.1/config/default.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index ae533cc4..afbafe69 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -23,11 +23,10 @@ lcr-modules: bam: 1 reference: mem_mb: 8000 - bam: 1 threads: battenberg: 24 - reference: 5 + reference: 2 #ideal for processing all chromosomes at once pairing_config: From 16842f61066dd78136de277c97186453cdd3880b Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 16:34:29 -0700 Subject: [PATCH 23/35] bumped the version number to 1.2 --- modules/battenberg/1.2/battenberg.smk | 303 ++++++++++++++++++ modules/battenberg/1.2/config/default.yaml | 40 +++ .../battenberg/1.2/envs/battenberg-1.1.yaml | 222 +++++++++++++ modules/battenberg/1.2/envs/samtools-1.9.yaml | 1 + modules/battenberg/1.2/envs/wget-1.20.1.yaml | 1 + modules/battenberg/1.2/schemas/base-1.0.yaml | 1 + .../battenberg/1.2/src/battenberg_wgs_hg38.R | 134 ++++++++ modules/battenberg/1.2/src/calc_sex_status.sh | 42 +++ .../1.2/src/reference_correction.py | 38 +++ 9 files changed, 782 insertions(+) create mode 100644 modules/battenberg/1.2/battenberg.smk create mode 100644 modules/battenberg/1.2/config/default.yaml create mode 100644 modules/battenberg/1.2/envs/battenberg-1.1.yaml create mode 120000 modules/battenberg/1.2/envs/samtools-1.9.yaml create mode 120000 modules/battenberg/1.2/envs/wget-1.20.1.yaml create mode 120000 modules/battenberg/1.2/schemas/base-1.0.yaml create mode 100755 modules/battenberg/1.2/src/battenberg_wgs_hg38.R create mode 100755 modules/battenberg/1.2/src/calc_sex_status.sh create mode 100644 modules/battenberg/1.2/src/reference_correction.py diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk new file mode 100644 index 00000000..5d4fde7f --- /dev/null +++ b/modules/battenberg/1.2/battenberg.smk @@ -0,0 +1,303 @@ +#!/usr/bin/env snakemake + + +##### ATTRIBUTION ##### + + +# Original Author: Ryan Morin +# Module Author: Ryan Morin +# Contributors: N/A + +##### SETUP ##### + + +# Import package with useful functions for developing analysis modules +import oncopipe as op +import glob + +# Check that the oncopipe dependency is up-to-date. Add all the following lines to any module that uses new features in oncopipe +min_oncopipe_version="1.0.11" +import pkg_resources +try: + from packaging import version +except ModuleNotFoundError: + sys.exit("The packaging module dependency is missing. Please install it ('pip install packaging') and ensure you are using the most up-to-date oncopipe version") + +# To avoid this we need to add the "packaging" module as a dependency for LCR-modules or oncopipe + +current_version = pkg_resources.get_distribution("oncopipe").version +if version.parse(current_version) < version.parse(min_oncopipe_version): + logger.warning( + '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' + "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m' + ) + sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)") + +# End of dependency checking section + +# Setup module and store module-specific configuration in `CFG` +# `CFG` is a shortcut to `config["lcr-modules"]["battenberg"]` +CFG = op.setup_module( + name = "battenberg", + version = "1.1", + subdirectories = ["inputs", "infer_sex","battenberg", "outputs"], +) + +#set variable for prepending to PATH based on config +SCRIPT_PATH = CFG['inputs']['src_dir'] +#this is used in place of the shell.prefix() because that was not working consistently. This is not ideal. + +#this preserves the variable when using lambda functions +_battenberg_CFG = CFG + +# Define rules to be run locally when using a compute cluster +localrules: + _battenberg_all + +VERSION_MAP = { + "hg19": "grch37", + "grch37": "grch37", + "hs37d5": "grch37", + "hg38": "hg38", + "grch38": "hg38", + "grch38-legacy": "hg38" + +} + +##### RULES ##### + +# Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . +rule _battenberg_get_reference: + output: + battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), + impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", + probloci = CFG["dirs"]["inputs"] + "reference/{genome_build}/probloci.txt.gz", + battenberg_wgs_replic_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_replic_correction_1000g_v3"), + battenberg_gc_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_gc_correction_1000g_v3"), + genomesloci = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") + params: + url = "https://www.bcgsc.ca/downloads/morinlab/reference", + alt_build = lambda w: VERSION_MAP[w.genome_build], + folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", + build = "{genome_build}", + PATH = CFG['inputs']['src_dir'] + resources: + **CFG["resources"]["reference"] + threads: + CFG["threads"]["reference"] + shell: + op.as_one_line(""" + wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | + tar -xvz > {output.battenberg_impute} -C {params.folder} + && + wget -qO- {params.url}/battenberg_{params.alt_build}_gc_correction.tar.gz | + tar -xvz > {output.battenberg_gc_correction} -C {params.folder} + && + wget -qO- {params.url}/battenberg_1000genomesloci_{params.alt_build}.tar.gz | + tar -xvz > {output.genomesloci} -C {params.folder} + && + wget -O {output.impute_info} {params.url}/impute_info_{params.alt_build}.txt + && + python {params.PATH}/reference_correction.py {params.build} + && + wget -qO- {params.url}/battenberg_{params.alt_build}_replic_correction.tar.gz | + tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} + && + wget -O {output.probloci} {params.url}/probloci_{params.alt_build}.txt.gz + + """) + + +# Symlinks the input files into the module results directory (under '00-inputs/') +rule _battenberg_input_bam: + input: + bam = CFG["inputs"]["sample_bam"] + output: + bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam", + bai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.bai", + crai = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{sample_id}.bam.crai" + group: "setup_run" + run: + op.absolute_symlink(input.bam, output.bam) + op.absolute_symlink(input.bam + ".bai", output.bai) + op.absolute_symlink(input.bam + ".bai", output.crai) + +# Installs the Battenberg R dependencies and associated software (impute2, alleleCounter) +# Currently I think this rule has to be run twice for it to work properly because the conda environment is created here. +# I am open to suggestions for how to get around this. +rule _install_battenberg: + output: + complete = "config/envs/battenberg_dependencies_installed.success" + conda: + CFG["conda_envs"]["battenberg"] + log: + input = CFG["logs"]["inputs"] + "input.log" + shell: + """ + R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")' >> {log.input} && ##move some of this to config? + R -q -e 'devtools::install_github("morinlab/battenberg")' >> {log.input} && ##move some of this to config? + touch {output.complete}""" + +# this process is very fast on bam files and painfully slow on cram files. +# The result of calc_sex_status.sh is stored in a file to avoid having to rerun it unnecessarily +rule _infer_patient_sex: + input: + normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa") + output: sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex" + resources: + **CFG["resources"]["infer_sex"] + log: + stderr = CFG["logs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}_infer_sex_stderr.log" + conda: + CFG["conda_envs"]["samtools"] + group: "setup_run" + threads: 8 + shell: + op.as_one_line(""" + echo "{params.checker}"; + + """) + + +# This rule runs the entire Battenberg pipeline. Eventually we may want to set this rule up to allow re-starting +# of partially completed jobs (e.g. if they run out of RAM and are killed by the cluster, they can automatically retry) +rule _run_battenberg: + input: + tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam", + normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", + installed = "config/envs/battenberg_dependencies_installed.success", + sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), + impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt" + + output: + refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt", + sub=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.txt", + ac=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_alleleCounts.tab"), + mb=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_mutantBAF.tab"), + mlrg=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_mutantLogR_gcCorrected.tab"), + mlr=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_mutantLogR.tab"), + nlr=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_normalLogR.tab"), + nb=temp(CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_normalBAF.tab"), + cp=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_cellularity_ploidy.txt" + log: + stdout = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stdout.log", + stderr = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stderr.log" + params: + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), + script = CFG["inputs"]["battenberg_script"], + out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}", + ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" + conda: + CFG["conda_envs"]["battenberg"] + resources: + **CFG["resources"]["battenberg"] + threads: + CFG["threads"]["battenberg"] + shell: + op.as_one_line(""" + if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='true'; else chr_prefixed='false'; fi; + echo "$chr_prefixed" + echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stdout}; + sex=$(cut -f 4 {input.sex_result}| tail -n 1); + echo "setting sex as $sex"; + Rscript {params.script} -t {wildcards.tumour_id} + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --reference {params.ref} + -o {params.out_dir} --chr_prefixed_genome $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; + """) + + +# Convert the subclones.txt (best fit) to igv-friendly SEG files. +rule _battenberg_to_igv_seg: + input: + sub = rules._run_battenberg.output.sub, + cnv2igv = CFG["inputs"]["cnv2igv"] + output: + seg = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.igv.seg" + log: + stderr = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_seg2igv.stderr.log" + threads: 1 + group: "post_process" + shell: + op.as_one_line(""" + echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stderr}; + python {input.cnv2igv} --mode battenberg --sample {wildcards.tumour_id} + {input.sub} > {output.seg} 2>> {log.stderr} + """) + + +#due to the large number of files (several per chromosome) that are not explicit outputs, do some glob-based cleaning in the output directory +rule _battenberg_cleanup: + input: + rules._battenberg_to_igv_seg.output.seg + output: + complete = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_cleanup_complete.txt" + group: "post_process" + shell: + op.as_one_line(""" + d=$(dirname {output}); + rm -f $d/*impute_input* && + rm -f $d/*alleleFrequencies* && + rm -f $d/*aplotype* && + rm -f $d/*BAFsegmented* && + touch {output.complete} + """) + +# Symlinks the final output files into the module results directory (under '99-outputs/') +# All plots generated by Battenberg are symlinked using a glob for convenience + +rule _battenberg_output_seg: + input: + seg = rules._battenberg_to_igv_seg.output.seg, + sub = rules._run_battenberg.output.sub, + cp = rules._run_battenberg.output.cp + output: + seg = CFG["dirs"]["outputs"] + "seg/{seq_type}--{genome_build}/{tumour_id}--{normal_id}_subclones.igv.seg", + sub = CFG["dirs"]["outputs"] + "txt/{seq_type}--{genome_build}/{tumour_id}--{normal_id}_subclones.txt", + cp = CFG["dirs"]["outputs"] + "txt/{seq_type}--{genome_build}/{tumour_id}--{normal_id}_cellularity_ploidy.txt" + params: + batt_dir = CFG["dirs"]["battenberg"] + "/{seq_type}--{genome_build}/{tumour_id}--{normal_id}", + png_dir = CFG["dirs"]["outputs"] + "png/{seq_type}--{genome_build}" + group: "post_process" + run: + plots = glob.glob(params.batt_dir + "/*.png") + for png in plots: + bn = os.path.basename(png) + op.relative_symlink(png, params.png_dir + "/" + bn,in_module=True) + op.relative_symlink(input.seg, output.seg,in_module=True) + op.relative_symlink(input.sub, output.sub,in_module=True) + op.relative_symlink(input.cp, output.cp,in_module=True) + + + +# Generates the target sentinels for each run, which generate the symlinks +rule _battenberg_all: + input: + expand( + [ + + rules._run_battenberg.output.sub, + rules._battenberg_output_seg.output.seg, + rules._battenberg_cleanup.output.complete + + + ], + zip, # Run expand() with zip(), not product() + seq_type=CFG["runs"]["tumour_seq_type"], + genome_build=CFG["runs"]["tumour_genome_build"], + tumour_id=CFG["runs"]["tumour_sample_id"], + normal_id=CFG["runs"]["normal_sample_id"], + pair_status=CFG["runs"]["pair_status"]) + + + + + +##### CLEANUP ##### + + +# Perform some clean-up tasks, including storing the module-specific +# configuration on disk and deleting the `CFG` variable +op.cleanup_module(CFG) diff --git a/modules/battenberg/1.2/config/default.yaml b/modules/battenberg/1.2/config/default.yaml new file mode 100644 index 00000000..afbafe69 --- /dev/null +++ b/modules/battenberg/1.2/config/default.yaml @@ -0,0 +1,40 @@ +lcr-modules: + battenberg: + inputs: + # Available wildcards: {seq_type} {genome_build} {sample_id} + sample_bam: "__UPDATE__" + battenberg_script: "{MODSDIR}/src/battenberg_wgs_hg38.R" + cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.3/cnv2igv.py" + src_dir: "{MODSDIR}/src/" + + scratch_subdirectories: [] + + conda_envs: + battenberg: "{MODSDIR}/envs/battenberg-1.1.yaml" + wget: "{MODSDIR}/envs/wget-1.20.1.yaml" + samtools: "{MODSDIR}/envs/samtools-1.9.yaml" + + resources: + battenberg: + mem_mb: 200000 + bam: 1 + infer_sex: + mem_mb: 20000 + bam: 1 + reference: + mem_mb: 8000 + + threads: + battenberg: 24 + reference: 2 + #ideal for processing all chromosomes at once + + pairing_config: + genome: + run_paired_tumours: True + run_unpaired_tumours_with: null + run_paired_tumours_as_unpaired: False + capture: + run_paired_tumours: True + run_unpaired_tumours_with: null + run_paired_tumours_as_unpaired: False diff --git a/modules/battenberg/1.2/envs/battenberg-1.1.yaml b/modules/battenberg/1.2/envs/battenberg-1.1.yaml new file mode 100644 index 00000000..e6f3a0da --- /dev/null +++ b/modules/battenberg/1.2/envs/battenberg-1.1.yaml @@ -0,0 +1,222 @@ +channels: + - conda-forge + - bioconda + - r + - defaults +dependencies: + - r-biocmanager=1.30.10 + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - _r-mutex=1.0.1 + - binutils_impl_linux-64=2.34 + - binutils_linux-64=2.34 + - bwidget=1.9.14 + - bzip2=1.0.8 + - ca-certificates=2020.4.5.1 + - cairo=1.16.0 + - cancerit-allelecount=4.0.2 + - certifi=2020.4.5.1 + - curl=7.69.1 + - fontconfig=2.13.1 + - freetype=2.10.1 + - fribidi=1.0.9 + - gcc_impl_linux-64=7.3.0 + - gcc_linux-64=7.3.0 + - gettext=0.19.8.1 + - gfortran_impl_linux-64=7.3.0 + - gfortran_linux-64=7.3.0 + - glib=2.64.2 + - graphite2=1.3.13 + - gsl=2.6 + - gxx_impl_linux-64=7.3.0 + - gxx_linux-64=7.3.0 + - harfbuzz=2.4.0 + - htslib=1.9 + - icu=64.2 + - impute2=2.3.2 + - jpeg=9c + - krb5=1.17.1 + - ld_impl_linux-64=2.34 + - libblas=3.8.0 + - libcblas=3.8.0 + - libcurl=7.69.1 + - libdeflate=1.2 + - libedit=3.1.20170329 + - libffi=3.2.1 + - libgcc-ng=9.2.0 + - libgfortran-ng=7.3.0 + - libgomp=9.2.0 + - libiconv=1.15 + - liblapack=3.8.0 + - libopenblas=0.3.9 + - libpng=1.6.37 + - libssh2=1.8.2 + - libstdcxx-ng=9.2.0 + - libtiff=4.1.0 + - libuuid=2.32.1 + - libwebp-base=1.1.0 + - libxcb=1.13 + - libxml2=2.9.10 + - llvm-openmp=10.0.0 + - lz4-c=1.9.2 + - make=4.3 + - ncurses=6.1 + - openssl=1.1.1g + - pandoc=2.9.2.1 + - pango=1.42.4 + - parallel=20200322 + - pcre=8.44 + - perl=5.26.2 + - pip=20.1 + - pixman=0.38.0 + - pthread-stubs=0.4 + - python=3.8.2 + - python_abi=3.8 + - r-askpass=1.1 + - r-assertthat=0.2.1 + - r-backports=1.1.6 + - r-base=3.6.3 + - r-base64enc=0.1_3 + - r-brew=1.0_6 + - r-broom=0.5.6 + - r-callr=3.4.3 + - r-cellranger=1.1.0 + - r-cli=2.0.2 + - r-clipr=0.7.0 + - r-codetools=0.2_16 + - r-colorspace=1.4_1 + - r-commonmark=1.7 + - r-covr=3.5.0 + - r-crayon=1.3.4 + - r-crosstalk=1.1.0.1 + - r-curl=4.3 + - r-dbi=1.1.0 + - r-dbplyr=1.4.3 + - r-desc=1.2.0 + - r-devtools=2.3.0 + - r-digest=0.6.25 + - r-doparallel=1.0.15 + - r-dplyr=0.8.5 + - r-dt=0.13 + - r-ellipsis=0.3.0 + - r-evaluate=0.14 + - r-fansi=0.4.1 + - r-farver=2.0.3 + - r-forcats=0.5.0 + - r-foreach=1.5.0 + - r-fs=1.4.1 + - r-generics=0.0.2 + - r-getopt=1.20.3 + - r-ggplot2=3.3.0 + - r-gh=1.1.0 + - r-git2r=0.26.1 + - r-glue=1.4.0 + - r-gridextra=2.3 + - r-gtable=0.3.0 + - r-gtools=3.8.2 + - r-haven=2.2.0 + - r-highr=0.8 + - r-hms=0.5.3 + - r-htmltools=0.4.0 + - r-htmlwidgets=1.5.1 + - r-httr=1.4.1 + - r-ini=0.3.1 + - r-isoband=0.2.1 + - r-iterators=1.0.12 + - r-jsonlite=1.6.1 + - r-knitr=1.28 + - r-labeling=0.3 + - r-later=1.0.0 + - r-lattice=0.20_41 + - r-lazyeval=0.2.2 + - r-lifecycle=0.2.0 + - r-lubridate=1.7.8 + - r-magrittr=1.5 + - r-markdown=1.1 + - r-mass=7.3_51.6 + - r-matrix=1.2_18 + - r-memoise=1.1.0 + - r-mgcv=1.8_31 + - r-mime=0.9 + - r-modelr=0.1.6 + - r-munsell=0.5.0 + - r-nlme=3.1_147 + - r-openssl=1.4.1 + - r-optparse=1.6.6 + - r-pillar=1.4.3 + - r-pkgbuild=1.0.7 + - r-pkgconfig=2.0.3 + - r-pkgload=1.0.2 + - r-plogr=0.2.0 + - r-plyr=1.8.6 + - r-praise=1.0.0 + - r-prettyunits=1.1.1 + - r-processx=3.4.2 + - r-progress=1.2.2 + - r-promises=1.1.0 + - r-ps=1.3.2 + - r-purrr=0.3.4 + - r-r6=2.4.1 + - r-rcmdcheck=1.3.3 + - r-rcolorbrewer=1.1_2 + - r-rcpp=1.0.4.6 + - r-readr=1.3.1 + - r-readxl=1.3.1 + - r-rematch=1.0.1 + - r-rematch2=2.1.1 + - r-remotes=2.1.1 + - r-reprex=0.3.0 + - r-reshape2=1.4.4 + - r-rex=1.2.0 + - r-rlang=0.4.5 + - r-rmarkdown=2.1 + - r-roxygen2=7.1.0 + - r-rprojroot=1.3_2 + - r-rstudioapi=0.11 + - r-rversions=2.0.1 + - r-rvest=0.3.5 + - r-scales=1.1.0 + - r-selectr=0.4_2 + - r-sessioninfo=1.1.1 + - r-stringi=1.4.6 + - r-stringr=1.4.0 + - r-sys=3.3 + - r-testthat=2.3.2 + - r-tibble=3.0.1 + - r-tidyr=1.0.2 + - r-tidyselect=1.0.0 + - r-tidyverse=1.3.0 + - r-tinytex=0.22 + - r-usethis=1.6.1 + - r-utf8=1.1.4 + - r-vctrs=0.2.4 + - r-viridislite=0.3.0 + - r-whisker=0.4 + - r-withr=2.2.0 + - r-xfun=0.13 + - r-xml2=1.3.2 + - r-xopen=1.0.0 + - r-yaml=2.2.1 + - r-zeallot=0.1.0 + - readline=8.0 + - sed=4.7 + - setuptools=46.1.3 + - sqlite=3.30.1 + - tk=8.6.10 + - tktable=2.10 + - wheel=0.34.2 + - xorg-kbproto=1.0.7 + - xorg-libice=1.0.10 + - xorg-libsm=1.2.3 + - xorg-libx11=1.6.9 + - xorg-libxau=1.0.9 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.10 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.2.5 + - zlib=1.2.11 + - zstd=1.4.4 + diff --git a/modules/battenberg/1.2/envs/samtools-1.9.yaml b/modules/battenberg/1.2/envs/samtools-1.9.yaml new file mode 120000 index 00000000..ab29288b --- /dev/null +++ b/modules/battenberg/1.2/envs/samtools-1.9.yaml @@ -0,0 +1 @@ +../../../../envs/samtools/samtools-1.9.yaml \ No newline at end of file diff --git a/modules/battenberg/1.2/envs/wget-1.20.1.yaml b/modules/battenberg/1.2/envs/wget-1.20.1.yaml new file mode 120000 index 00000000..86501e72 --- /dev/null +++ b/modules/battenberg/1.2/envs/wget-1.20.1.yaml @@ -0,0 +1 @@ +../../../../envs/wget/wget-1.20.1.yaml \ No newline at end of file diff --git a/modules/battenberg/1.2/schemas/base-1.0.yaml b/modules/battenberg/1.2/schemas/base-1.0.yaml new file mode 120000 index 00000000..0a69d1ce --- /dev/null +++ b/modules/battenberg/1.2/schemas/base-1.0.yaml @@ -0,0 +1 @@ +../../../../schemas/base/base-1.0.yaml \ No newline at end of file diff --git a/modules/battenberg/1.2/src/battenberg_wgs_hg38.R b/modules/battenberg/1.2/src/battenberg_wgs_hg38.R new file mode 100755 index 00000000..fb9f3686 --- /dev/null +++ b/modules/battenberg/1.2/src/battenberg_wgs_hg38.R @@ -0,0 +1,134 @@ +library(Battenberg) +library(optparse) +#source("./src/R/battenberg/R/clonal_ascat.R") +#source("./src/R/battenberg/R/impute.R") +#devtools::load_all(path="/projects/rmorin/projects/gambl-repos/gambl-rmorin/src/R/battenberg") +#source("/projects/rmorin/projects/gambl-repos/gambl-rmorin/src/R/prepare_wgs.R") +option_list = list( + make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), + make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), + make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), + make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), + make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), + make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), + make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), + make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), + make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), + make_option(c("--reference"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), + make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), + make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared.") +) + +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) +original_dir = getwd() + +REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$reference) +TUMOURNAME = opt$tumourname +NORMALNAME = opt$normalname + +REFERENCE_FASTA = opt$reference_fasta +print(paste("using fasta:",REFERENCE_FASTA)) +IS.MALE = opt$sex=="male" | opt$sex=="Male" +RUN_DIR = opt$o +CHR_PREFIXED = opt$chr_prefixed_genome +print(paste("chr prefix present?",CHR_PREFIXED)) +SKIP_ALLELECOUNTING = opt$skip_allelecount +SKIP_PREPROCESSING = opt$skip_preprocessing +SKIP_PHASING = opt$skip_phasing +NTHREADS = opt$cpu +PRIOR_BREAKPOINTS_FILE = opt$bp +IMPUTE_LOG = opt$impute_log +verbose = TRUE +############################################################################### +# 2018-11-01 +# A pure R Battenberg v2.2.9 WGS pipeline implementation. +# sd11 [at] sanger.ac.uk +############################################################################### + +# General static +IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") +print(IMPUTEINFOFILE) +G1000PREFIX = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") +G1000PREFIX_AC = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") +GCCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_gc_correction_1000g_v3/1000_genomes_GC_corr_chr_") +REPLICCORRECTPREFIX = paste0(REFERENCE_BASE,"/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_") +IMPUTE_EXE = "impute2" #install using conda + +PLATFORM_GAMMA = 1 +PHASING_GAMMA = 1 +SEGMENTATION_GAMMA = 10 +SEGMENTATIIN_KMIN = 3 +PHASING_KMIN = 1 +CLONALITY_DIST_METRIC = 0 +ASCAT_DIST_METRIC = 1 +MIN_PLOIDY = 1.6 +MAX_PLOIDY = 4.8 +MIN_RHO = 0.1 +MIN_GOODNESS_OF_FIT = 0.63 +BALANCED_THRESHOLD = 0.51 +MIN_NORMAL_DEPTH = 10 +MIN_BASE_QUAL = 20 +MIN_MAP_QUAL = 35 +CALC_SEG_BAF_OPTION = 3 + +# WGS specific static +ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount +PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") + +print(PROBLEMLOCI); + +# Change to work directory and load the chromosome information +setwd(RUN_DIR) +NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) +TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) + +#this should be the full path to the files after changing directories + +#debugging lines added here: +#SKIP_ALLELECOUNTING = TRUE +#SKIP_PREPROCESSING = TRUE +#SKIP_PHASING = FALSE + +battenberg(tumourname=TUMOURNAME, + normalname=NORMALNAME, + tumour_data_file=TUMOURBAM, + normal_data_file=NORMALBAM, + ismale=IS.MALE, + imputeinfofile=IMPUTEINFOFILE, + g1000prefix=G1000PREFIX, + g1000allelesprefix=G1000PREFIX_AC, + gccorrectprefix=GCCORRECTPREFIX, + repliccorrectprefix=REPLICCORRECTPREFIX, + problemloci=PROBLEMLOCI, + data_type="wgs", + impute_exe=IMPUTE_EXE, + allelecounter_exe=ALLELECOUNTER, + nthreads=NTHREADS, + platform_gamma=PLATFORM_GAMMA, + phasing_gamma=PHASING_GAMMA, + segmentation_gamma=SEGMENTATION_GAMMA, + segmentation_kmin=SEGMENTATIIN_KMIN, + phasing_kmin=PHASING_KMIN, + clonality_dist_metric=CLONALITY_DIST_METRIC, + ascat_dist_metric=ASCAT_DIST_METRIC, + min_ploidy=MIN_PLOIDY, + max_ploidy=MAX_PLOIDY, + min_rho=MIN_RHO, + min_goodness=MIN_GOODNESS_OF_FIT, + uninformative_BAF_threshold=BALANCED_THRESHOLD, + min_normal_depth=MIN_NORMAL_DEPTH, + min_base_qual=MIN_BASE_QUAL, + min_map_qual=MIN_MAP_QUAL, + calc_seg_baf_option=CALC_SEG_BAF_OPTION, + skip_allele_counting=SKIP_ALLELECOUNTING, + skip_preprocessing=SKIP_PREPROCESSING, + skip_phasing=SKIP_PHASING, + prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE, + chr_prefixed=CHR_PREFIXED, + verbose=verbose, + logfile_prefix=IMPUTE_LOG, + ref_fasta=REFERENCE_FASTA) diff --git a/modules/battenberg/1.2/src/calc_sex_status.sh b/modules/battenberg/1.2/src/calc_sex_status.sh new file mode 100755 index 00000000..3d750a05 --- /dev/null +++ b/modules/battenberg/1.2/src/calc_sex_status.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# Infer sex of a patient from a normal genome bam using the ratio of X and Y chromosome reads +# If provided, the names of chrX and chrY will be used, otherwise they will be inferred from the header (this is not guaranteed to work) + +set -euf -o pipefail + +BAM="$1" +REF="$2" +SAMPLE="${3:-UNKNOWN}" +X_CHROM="${4:-MISSING}" +Y_CHROM="${5:-MISSING}" +DEBUG="${6:-MISSING}" + +if [[ $X_CHROM == "MISSING" ]] +then + X_CHROM=$(samtools view -H ${BAM} |\ + sed -r 's/\S+:\S+/\n&/g' | perl -ne 's/\s+//g;print "$_\n"' | awk 'BEGIN{FS=":"} $1=="SN" && $2 ~ /X$/ {print $2}') +fi +if [[ $Y_CHROM == "MISSING" ]] +then + Y_CHROM=$(samtools view -H ${BAM} |\ + sed -r 's/\S+:\S+/\n&/g' | perl -ne 's/\s+//g;print "$_\n"' | awk 'BEGIN{FS=":"} $1=="SN" && $2 ~ /Y$/ {print $2}') +fi +if [[ ! $DEBUG == "MISSING" ]] +then + echo "DEBUG: x chromosome is named >$X_CHROM< and y chromosome is named >$Y_CHROM<" +fi + + +X_READS=$(samtools view -@ 8 -T $REF $BAM $X_CHROM | wc -l) +Y_READS=$(samtools view -@ 8 -T $REF $BAM $Y_CHROM | wc -l) + + +ratio=$((100 * $Y_READS/$X_READS)) +sex="female" +if [[ $ratio -gt 10 ]] +then + sex="male" +fi +printf "sample\tchrX_count\tchrY_count\tsex\n" +printf "$SAMPLE\t$X_READS\t$Y_READS\t$sex\n" diff --git a/modules/battenberg/1.2/src/reference_correction.py b/modules/battenberg/1.2/src/reference_correction.py new file mode 100644 index 00000000..115ddc04 --- /dev/null +++ b/modules/battenberg/1.2/src/reference_correction.py @@ -0,0 +1,38 @@ +##### ATTRIBUTION ##### + + +# Original Author: Lakshay Sethi + +### Battenberg refrence file corrector ### +# Replaces the placeholder value in the impute_info.txt with the correct path +# where the reference files downloaded are stored. + +#!/usr/bin/env Python script +# +# Usage: +# python /reference_correction.py +# +# Notes: +# This script is intended for use with the Battenberg-1.1 module in LCR-modules. +# It expects to find the genome build at the input path, following +# the pattern reference_correction.py {genome_build}. These files should be in the +# 00-inputs subdirectory of the battenberg-1.1 directory present in the results directory. +# +# The file is made to be present in the src sub directory of the module. +# +# The sample table should adhere to LCR-modules guidelines. + +import os +import sys +cwd = os.getcwd() + +fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'r') +filedata = fileIN.read() +fileIN.close() + +newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/battenberg_impute_v3") + +fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'w') +fileOut.write(newdata) +fileOut.close() + From 507dc99d9cd6c4217285a9fda7bf84ccf8d7da83 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 16:36:45 -0700 Subject: [PATCH 24/35] return battenberg 1.1 to master level --- modules/battenberg/1.1/battenberg.smk | 90 +++---------------- modules/battenberg/1.1/config/default.yaml | 20 +++-- .../battenberg/1.1/src/battenberg_wgs_hg38.R | 11 +-- 3 files changed, 30 insertions(+), 91 deletions(-) diff --git a/modules/battenberg/1.1/battenberg.smk b/modules/battenberg/1.1/battenberg.smk index 5414af77..ffc86caf 100644 --- a/modules/battenberg/1.1/battenberg.smk +++ b/modules/battenberg/1.1/battenberg.smk @@ -27,10 +27,8 @@ except ModuleNotFoundError: current_version = pkg_resources.get_distribution("oncopipe").version if version.parse(current_version) < version.parse(min_oncopipe_version): - logger.warning( - '\x1b[0;31;40m' + f'ERROR: oncopipe version installed: {current_version}' - "\n" f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment" + '\x1b[0m' - ) + print(f"ERROR: oncopipe version installed: {current_version}") + print(f"ERROR: This module requires oncopipe version >= {min_oncopipe_version}. Please update oncopipe in your environment") sys.exit("Instructions for updating to the current version of oncopipe are available at https://lcr-modules.readthedocs.io/en/latest/ (use option 2)") # End of dependency checking section @@ -54,60 +52,9 @@ _battenberg_CFG = CFG localrules: _battenberg_all -VERSION_MAP = { - "hg19": "grch37", - "grch37": "grch37", - "hs37d5": "grch37", - "hg38": "hg38", - "grch38": "hg38", - "grch38-legacy": "hg38" - -} ##### RULES ##### -# Downloads the reference files into the module results directory (under '00-inputs/') from https://www.bcgsc.ca/downloads/morinlab/reference/ . -rule _battenberg_get_reference: - output: - battenberg_impute = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_impute_v3"), - impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt", - probloci = CFG["dirs"]["inputs"] + "reference/{genome_build}/probloci.txt.gz", - battenberg_wgs_replic_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_replic_correction_1000g_v3"), - battenberg_gc_correction = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_wgs_gc_correction_1000g_v3"), - genomesloci = directory(CFG["dirs"]["inputs"] + "reference/{genome_build}/battenberg_1000genomesloci2012_v3") - params: - url = "https://www.bcgsc.ca/downloads/morinlab/reference", - alt_build = lambda w: VERSION_MAP[w.genome_build], - folder = CFG["dirs"]["inputs"] + "reference/{genome_build}", - build = "{genome_build}", - PATH = CFG['inputs']['src_dir'] - resources: - **CFG["resources"]["reference"] - threads: - CFG["threads"]["reference"] - shell: - op.as_one_line(""" - wget -qO- {params.url}/battenberg_impute_{params.alt_build}.tar.gz | - tar -xvz > {output.battenberg_impute} -C {params.folder} - && - wget -qO- {params.url}/battenberg_{params.alt_build}_gc_correction.tar.gz | - tar -xvz > {output.battenberg_gc_correction} -C {params.folder} - && - wget -qO- {params.url}/battenberg_1000genomesloci_{params.alt_build}.tar.gz | - tar -xvz > {output.genomesloci} -C {params.folder} - && - wget -O {output.impute_info} {params.url}/impute_info_{params.alt_build}.txt - && - python {params.PATH}/reference_correction.py {params.build} - && - wget -qO- {params.url}/battenberg_{params.alt_build}_replic_correction.tar.gz | - tar -xvz > {output.battenberg_wgs_replic_correction} -C {params.folder} - && - wget -O {output.probloci} {params.url}/probloci_{params.alt_build}.txt.gz - - """) - - # Symlinks the input files into the module results directory (under '00-inputs/') rule _battenberg_input_bam: input: @@ -130,12 +77,10 @@ rule _install_battenberg: complete = "config/envs/battenberg_dependencies_installed.success" conda: CFG["conda_envs"]["battenberg"] - log: - input = CFG["logs"]["inputs"] + "input.log" shell: """ - R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")' >> {log.input} && ##move some of this to config? - R -q -e 'devtools::install_github("morinlab/battenberg")' >> {log.input} && ##move some of this to config? + R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")' && ##move some of this to config? + R -q -e 'devtools::install_github("morinlab/battenberg")' && ##move some of this to config? touch {output.complete}""" # this process is very fast on bam files and painfully slow on cram files. @@ -149,8 +94,6 @@ rule _infer_patient_sex: **CFG["resources"]["infer_sex"] log: stderr = CFG["logs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}_infer_sex_stderr.log" - conda: - CFG["conda_envs"]["samtools"] group: "setup_run" threads: 8 shell: @@ -164,15 +107,14 @@ rule _infer_patient_sex: # This rule runs the entire Battenberg pipeline. Eventually we may want to set this rule up to allow re-starting # of partially completed jobs (e.g. if they run out of RAM and are killed by the cluster, they can automatically retry) +# TODO: this rule needs to be modified to rely on reference_files and allow setup (downloading) of the Battenberg references rule _run_battenberg: input: tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam", normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", installed = "config/envs/battenberg_dependencies_installed.success", sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", - fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), - impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt" - + fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa") output: refit=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_refit_suggestion.txt", sub=CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_subclones.txt", @@ -187,10 +129,10 @@ rule _run_battenberg: stdout = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stdout.log", stderr = CFG["logs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}/{tumour_id}_battenberg.stderr.log" params: - fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), + reference_path = lambda w: _battenberg_CFG["reference_path"][w.genome_build], script = CFG["inputs"]["battenberg_script"], - out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}", - ref = CFG["dirs"]["inputs"] + "reference/{genome_build}" + chr_prefixed = lambda w: _battenberg_CFG["options"]["chr_prefixed_reference"][w.genome_build], + out_dir = CFG["dirs"]["battenberg"] + "{seq_type}--{genome_build}/{tumour_id}--{normal_id}" conda: CFG["conda_envs"]["battenberg"] resources: @@ -199,14 +141,12 @@ rule _run_battenberg: CFG["threads"]["battenberg"] shell: op.as_one_line(""" - if [[ $(head -c 4 {params.fasta}) == ">chr" ]]; then chr_prefixed='true'; else chr_prefixed='false'; fi; - echo "$chr_prefixed" echo "running {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" > {log.stdout}; sex=$(cut -f 4 {input.sex_result}| tail -n 1); echo "setting sex as $sex"; Rscript {params.script} -t {wildcards.tumour_id} - -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} --reference {params.ref} - -o {params.out_dir} --chr_prefixed_genome $chr_prefixed --sex $sex --cpu {threads} >> {log.stdout} 2>> {log.stderr} && + -n {wildcards.normal_id} --tb {input.tumour_bam} --nb {input.normal_bam} -f {input.fasta} + -o {params.out_dir} --sex $sex --reference {params.reference_path} {params.chr_prefixed} --cpu {threads} >> {log.stdout} 2>> {log.stderr} && echo "DONE {rule} for {wildcards.tumour_id}--{wildcards.normal_id} on $(hostname) at $(date)" >> {log.stdout}; """) @@ -272,19 +212,14 @@ rule _battenberg_output_seg: op.relative_symlink(input.sub, output.sub,in_module=True) op.relative_symlink(input.cp, output.cp,in_module=True) - - # Generates the target sentinels for each run, which generate the symlinks rule _battenberg_all: input: expand( [ - rules._run_battenberg.output.sub, rules._battenberg_output_seg.output.seg, rules._battenberg_cleanup.output.complete - - ], zip, # Run expand() with zip(), not product() seq_type=CFG["runs"]["tumour_seq_type"], @@ -294,9 +229,6 @@ rule _battenberg_all: pair_status=CFG["runs"]["pair_status"]) - - - ##### CLEANUP ##### diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index afbafe69..fc22002a 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -1,4 +1,5 @@ lcr-modules: + battenberg: inputs: # Available wildcards: {seq_type} {genome_build} {sample_id} @@ -9,11 +10,19 @@ lcr-modules: scratch_subdirectories: [] + reference_path: + hg38: "__UPDATE__" + grch37: "__UPDATE__" + + options: + #update and add/remove these lines as needed for the reference genomes being used. + chr_prefixed_reference: + hg38: " --chr_prefixed_genome " + grch37: " " + conda_envs: battenberg: "{MODSDIR}/envs/battenberg-1.1.yaml" - wget: "{MODSDIR}/envs/wget-1.20.1.yaml" - samtools: "{MODSDIR}/envs/samtools-1.9.yaml" - + resources: battenberg: mem_mb: 200000 @@ -21,12 +30,9 @@ lcr-modules: infer_sex: mem_mb: 20000 bam: 1 - reference: - mem_mb: 8000 - + threads: battenberg: 24 - reference: 2 #ideal for processing all chromosomes at once pairing_config: diff --git a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R index fb9f3686..2bcbd25c 100755 --- a/modules/battenberg/1.1/src/battenberg_wgs_hg38.R +++ b/modules/battenberg/1.1/src/battenberg_wgs_hg38.R @@ -16,7 +16,7 @@ option_list = list( make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), - make_option(c("--reference"), type="character", default=NULL, help="Path to reference file", metavar="character"), + make_option(c("--reference"), type="character", default=NULL, help="Path to reference files", metavar="character"), make_option(c("-f","--reference_fasta"), type="character", default=NULL, help="Path to indexed genome fasta file (needed for CRAM compatability)", metavar="character"), make_option(c("--chr_prefixed_genome"), type="logical", default=FALSE, action="store_true", help="Flag to specify if the genome has chr prefixes in chromosome names", metavar="character"), make_option(c("--impute_log"), type="character", default="./", help="Full path for where to store impute logs. If blank, these will be written to the main output directory and cleared.") @@ -24,9 +24,9 @@ option_list = list( opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) -original_dir = getwd() -REFERENCE_BASE = paste0(normalizePath(original_dir,"\\"), "/",opt$reference) +REFERENCE_BASE = opt$reference + TUMOURNAME = opt$tumourname NORMALNAME = opt$normalname @@ -50,7 +50,7 @@ verbose = TRUE ############################################################################### # General static -IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/impute_info.txt") +IMPUTEINFOFILE = paste0(REFERENCE_BASE,"/battenberg_impute_v3/impute_info_fix.txt") print(IMPUTEINFOFILE) G1000PREFIX = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr") G1000PREFIX_AC = paste0(REFERENCE_BASE,"/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr") @@ -77,11 +77,12 @@ CALC_SEG_BAF_OPTION = 3 # WGS specific static ALLELECOUNTER = "alleleCounter" #conda package that should have this: cancerit-allelecount -PROBLEMLOCI = paste0(REFERENCE_BASE, "/probloci.txt.gz") +PROBLEMLOCI = paste0(REFERENCE_BASE,"/probloci_270415.txt.gz") print(PROBLEMLOCI); # Change to work directory and load the chromosome information +original_dir = getwd() setwd(RUN_DIR) NORMALBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$nb) TUMOURBAM = paste0(normalizePath(original_dir,"\\"), "/",opt$tb) From a834c76a73dd888f3af86644b4286d16d4eebdfe Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 17:15:18 -0700 Subject: [PATCH 25/35] updated changelog; changed version numbers between files --- .../1.1/src/reference_correction.py | 38 --------------- modules/battenberg/1.2/battenberg.smk | 2 +- .../1.2/src/reference_correction.py | 47 +++++++++++++------ modules/battenberg/CHANGELOG.md | 7 +++ 4 files changed, 41 insertions(+), 53 deletions(-) delete mode 100644 modules/battenberg/1.1/src/reference_correction.py diff --git a/modules/battenberg/1.1/src/reference_correction.py b/modules/battenberg/1.1/src/reference_correction.py deleted file mode 100644 index 115ddc04..00000000 --- a/modules/battenberg/1.1/src/reference_correction.py +++ /dev/null @@ -1,38 +0,0 @@ -##### ATTRIBUTION ##### - - -# Original Author: Lakshay Sethi - -### Battenberg refrence file corrector ### -# Replaces the placeholder value in the impute_info.txt with the correct path -# where the reference files downloaded are stored. - -#!/usr/bin/env Python script -# -# Usage: -# python /reference_correction.py -# -# Notes: -# This script is intended for use with the Battenberg-1.1 module in LCR-modules. -# It expects to find the genome build at the input path, following -# the pattern reference_correction.py {genome_build}. These files should be in the -# 00-inputs subdirectory of the battenberg-1.1 directory present in the results directory. -# -# The file is made to be present in the src sub directory of the module. -# -# The sample table should adhere to LCR-modules guidelines. - -import os -import sys -cwd = os.getcwd() - -fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'r') -filedata = fileIN.read() -fileIN.close() - -newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/battenberg_impute_v3") - -fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'w') -fileOut.write(newdata) -fileOut.close() - diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk index 5d4fde7f..b540e943 100644 --- a/modules/battenberg/1.2/battenberg.smk +++ b/modules/battenberg/1.2/battenberg.smk @@ -39,7 +39,7 @@ if version.parse(current_version) < version.parse(min_oncopipe_version): # `CFG` is a shortcut to `config["lcr-modules"]["battenberg"]` CFG = op.setup_module( name = "battenberg", - version = "1.1", + version = "1.2", subdirectories = ["inputs", "infer_sex","battenberg", "outputs"], ) diff --git a/modules/battenberg/1.2/src/reference_correction.py b/modules/battenberg/1.2/src/reference_correction.py index 115ddc04..236d63d7 100644 --- a/modules/battenberg/1.2/src/reference_correction.py +++ b/modules/battenberg/1.2/src/reference_correction.py @@ -3,36 +3,55 @@ # Original Author: Lakshay Sethi -### Battenberg refrence file corrector ### -# Replaces the placeholder value in the impute_info.txt with the correct path -# where the reference files downloaded are stored. +### Battenberg refrence file corrector ### +# Replaces the placeholder value in the impute_info.txt with the correct path +# where the reference files downloaded are stored. #!/usr/bin/env Python script # -# Usage: +# Usage: # python /reference_correction.py # -# Notes: -# This script is intended for use with the Battenberg-1.1 module in LCR-modules. -# It expects to find the genome build at the input path, following -# the pattern reference_correction.py {genome_build}. These files should be in the -# 00-inputs subdirectory of the battenberg-1.1 directory present in the results directory. +# Notes: +# This script is intended for use with the Battenberg-1.2 module in LCR-modules. +# It expects to find the genome build at the input path, following +# the pattern reference_correction.py {genome_build}. These files should be in the +# 00-inputs subdirectory of the battenberg-1.2 directory present in the results directory. # # The file is made to be present in the src sub directory of the module. # -# The sample table should adhere to LCR-modules guidelines. +# The sample table should adhere to LCR-modules guidelines. import os import sys + cwd = os.getcwd() -fileIN = open( cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'r') +fileIN = open( + cwd + + "/results/battenberg-1.2/00-inputs/reference/" + + sys.argv[1] + + "/impute_info.txt", + "r", +) filedata = fileIN.read() fileIN.close() -newdata = filedata.replace("", cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/battenberg_impute_v3") - -fileOut = open(cwd + "/results/battenberg-1.1/00-inputs/reference/" + sys.argv[1] + "/impute_info.txt", 'w') +newdata = filedata.replace( + "", + cwd + + "/results/battenberg-1.2/00-inputs/reference/" + + sys.argv[1] + + "/battenberg_impute_v3", +) + +fileOut = open( + cwd + + "/results/battenberg-1.2/00-inputs/reference/" + + sys.argv[1] + + "/impute_info.txt", + "w", +) fileOut.write(newdata) fileOut.close() diff --git a/modules/battenberg/CHANGELOG.md b/modules/battenberg/CHANGELOG.md index b2797d35..fb9eaab9 100644 --- a/modules/battenberg/CHANGELOG.md +++ b/modules/battenberg/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.2] - 2021-04-23 + +This release was authored by Lakshay Sethi. +This overhaul began with addition of an automatic way to download reference files, as a way to handle the grievances +from version 1.0. To achieve this I created a new rule, _battenberg_get_reference, through which I downloaded the files from GSC web portal and used a script called reference_correction.py, to automatically replace placeholders with the correct paths. After this I automated the way chr prefix are used, using a regex statement to directly read it from the genome.fa file of that respective genome_build. To increase the scalability of Battenberg, ability to use different genomes where added by making a VERSION_MAP dictionary. Then to make the log files more informative, I shifted the output generated by rule _battenberg_input_bam from terminal to log file called input.log. +As a result of both reference file downloading and chr prefix reading going automatic, variables reference_path and chr_prefixed_reference from config file were removed. + ## [1.1] - 2020-12-22 This release was authored by Ryan Morin. From c09ffdc0bc8c544e715f5161a05bb8f776be21c3 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 17:34:51 -0700 Subject: [PATCH 26/35] updated changelog --- modules/battenberg/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/battenberg/CHANGELOG.md b/modules/battenberg/CHANGELOG.md index fb9eaab9..d42c9938 100644 --- a/modules/battenberg/CHANGELOG.md +++ b/modules/battenberg/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 This release was authored by Lakshay Sethi. This overhaul began with addition of an automatic way to download reference files, as a way to handle the grievances -from version 1.0. To achieve this I created a new rule, _battenberg_get_reference, through which I downloaded the files from GSC web portal and used a script called reference_correction.py, to automatically replace placeholders with the correct paths. After this I automated the way chr prefix are used, using a regex statement to directly read it from the genome.fa file of that respective genome_build. To increase the scalability of Battenberg, ability to use different genomes where added by making a VERSION_MAP dictionary. Then to make the log files more informative, I shifted the output generated by rule _battenberg_input_bam from terminal to log file called input.log. +from version 1.0. To achieve this I created a new rule, _battenberg_get_reference, through which I downloaded the files from GSC web portal and used a script called reference_correction.py, to automatically replace placeholders with the correct paths. After this I automated the way chr prefix are used, using a regex statement to directly read it from the genome.fa file of that respective genome_build. To increase the scalability of Battenberg, ability to use different genomes where added by making a VERSION_MAP dictionary. Then to make the log files more informative, I shifted the output generated by rule _install_battenberg from terminal to log file called input.log. As a result of both reference file downloading and chr prefix reading going automatic, variables reference_path and chr_prefixed_reference from config file were removed. ## [1.1] - 2020-12-22 From 35da9b6a645ad37b6bf3672650a0157e13229b8e Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 17:42:02 -0700 Subject: [PATCH 27/35] removed extra files --- modules/battenberg/1.1/config/default.yaml | 2 +- modules/battenberg/1.1/envs/samtools-1.9.yaml | 1 - modules/battenberg/1.1/envs/wget-1.20.1.yaml | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) delete mode 120000 modules/battenberg/1.1/envs/samtools-1.9.yaml delete mode 120000 modules/battenberg/1.1/envs/wget-1.20.1.yaml diff --git a/modules/battenberg/1.1/config/default.yaml b/modules/battenberg/1.1/config/default.yaml index fc22002a..384415ef 100644 --- a/modules/battenberg/1.1/config/default.yaml +++ b/modules/battenberg/1.1/config/default.yaml @@ -21,7 +21,7 @@ lcr-modules: grch37: " " conda_envs: - battenberg: "{MODSDIR}/envs/battenberg-1.1.yaml" + battenberg: "{MODSDIR}/envs/battenberg-1.0.yaml" resources: battenberg: diff --git a/modules/battenberg/1.1/envs/samtools-1.9.yaml b/modules/battenberg/1.1/envs/samtools-1.9.yaml deleted file mode 120000 index ab29288b..00000000 --- a/modules/battenberg/1.1/envs/samtools-1.9.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../envs/samtools/samtools-1.9.yaml \ No newline at end of file diff --git a/modules/battenberg/1.1/envs/wget-1.20.1.yaml b/modules/battenberg/1.1/envs/wget-1.20.1.yaml deleted file mode 120000 index 86501e72..00000000 --- a/modules/battenberg/1.1/envs/wget-1.20.1.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../envs/wget/wget-1.20.1.yaml \ No newline at end of file From 5bffac405a08de9e3b4cc2d7e663f05bb6cd3135 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 17:52:31 -0700 Subject: [PATCH 28/35] added files from origin/master to battenberg1.1 --- modules/battenberg/1.1/envs/samtools-1.9.yaml | 1 + 1 file changed, 1 insertion(+) create mode 120000 modules/battenberg/1.1/envs/samtools-1.9.yaml diff --git a/modules/battenberg/1.1/envs/samtools-1.9.yaml b/modules/battenberg/1.1/envs/samtools-1.9.yaml new file mode 120000 index 00000000..ab29288b --- /dev/null +++ b/modules/battenberg/1.1/envs/samtools-1.9.yaml @@ -0,0 +1 @@ +../../../../envs/samtools/samtools-1.9.yaml \ No newline at end of file From a179dfb3e0e15fd444ae1b2af66f5335703bd219 Mon Sep 17 00:00:00 2001 From: Lakshay Date: Fri, 23 Apr 2021 18:23:01 -0700 Subject: [PATCH 29/35] trivial changes --- modules/battenberg/1.2/battenberg.smk | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk index b540e943..1aec38e0 100644 --- a/modules/battenberg/1.2/battenberg.smk +++ b/modules/battenberg/1.2/battenberg.smk @@ -155,8 +155,10 @@ rule _infer_patient_sex: threads: 8 shell: op.as_one_line(""" - echo "{params.checker}"; - + PATH={SCRIPT_PATH}:$PATH; + echo "running {rule} for {wildcards.normal_id} on $(hostname) at $(date)" > {log.stderr} ; + calc_sex_status.sh {input.normal_bam} {input.fasta} {wildcards.normal_id} > {output.sex_result} 2>> {log.stderr} && + echo "DONE running {rule} for {wildcards.normal_id} on $(hostname) at $(date)" >> {log.stderr} """) @@ -270,19 +272,14 @@ rule _battenberg_output_seg: op.relative_symlink(input.sub, output.sub,in_module=True) op.relative_symlink(input.cp, output.cp,in_module=True) - - # Generates the target sentinels for each run, which generate the symlinks rule _battenberg_all: input: expand( - [ - + [ rules._run_battenberg.output.sub, rules._battenberg_output_seg.output.seg, rules._battenberg_cleanup.output.complete - - ], zip, # Run expand() with zip(), not product() seq_type=CFG["runs"]["tumour_seq_type"], @@ -292,9 +289,6 @@ rule _battenberg_all: pair_status=CFG["runs"]["pair_status"]) - - - ##### CLEANUP ##### From 62877a17294603c4f75c3b684465a17106f34c69 Mon Sep 17 00:00:00 2001 From: Lakshay-sethi <58126894+Lakshay-sethi@users.noreply.github.com> Date: Wed, 28 Apr 2021 13:46:43 -0700 Subject: [PATCH 30/35] changed configs/envs to CFG["dirs"]["input"] --- modules/battenberg/1.2/battenberg.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk index 1aec38e0..2f1b2851 100644 --- a/modules/battenberg/1.2/battenberg.smk +++ b/modules/battenberg/1.2/battenberg.smk @@ -127,7 +127,7 @@ rule _battenberg_input_bam: # I am open to suggestions for how to get around this. rule _install_battenberg: output: - complete = "config/envs/battenberg_dependencies_installed.success" + complete = CFG["dirs"]["input"] + "/battenberg_dependencies_installed.success" conda: CFG["conda_envs"]["battenberg"] log: From 1a3b18a7650f6f65e5d67972c14a60d6397c26ed Mon Sep 17 00:00:00 2001 From: Lakshay-sethi <58126894+Lakshay-sethi@users.noreply.github.com> Date: Wed, 28 Apr 2021 15:51:41 -0700 Subject: [PATCH 31/35] corrected inputs and tested --- modules/battenberg/1.2/battenberg.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/battenberg/1.2/battenberg.smk b/modules/battenberg/1.2/battenberg.smk index 2f1b2851..a89b4b13 100644 --- a/modules/battenberg/1.2/battenberg.smk +++ b/modules/battenberg/1.2/battenberg.smk @@ -127,7 +127,7 @@ rule _battenberg_input_bam: # I am open to suggestions for how to get around this. rule _install_battenberg: output: - complete = CFG["dirs"]["input"] + "/battenberg_dependencies_installed.success" + complete = CFG["dirs"]["inputs"] + "battenberg_dependencies_installed.success" conda: CFG["conda_envs"]["battenberg"] log: @@ -168,7 +168,7 @@ rule _run_battenberg: input: tumour_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{tumour_id}.bam", normal_bam = CFG["dirs"]["inputs"] + "bam/{seq_type}--{genome_build}/{normal_id}.bam", - installed = "config/envs/battenberg_dependencies_installed.success", + installed = CFG["dirs"]["inputs"] + "battenberg_dependencies_installed.success", sex_result = CFG["dirs"]["infer_sex"] + "{seq_type}--{genome_build}/{normal_id}.sex", fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa"), impute_info = CFG["dirs"]["inputs"] + "reference/{genome_build}/impute_info.txt" From e45d68278bc5d6243ccb3ed856b3c314994be2d2 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Sat, 1 May 2021 17:41:11 -0700 Subject: [PATCH 32/35] restore yamls from master --- envs/samtools/samtools-1.9.yaml | 1 + envs/wget/wget-1.20.1.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/envs/samtools/samtools-1.9.yaml b/envs/samtools/samtools-1.9.yaml index 3742eddf..07229243 100644 --- a/envs/samtools/samtools-1.9.yaml +++ b/envs/samtools/samtools-1.9.yaml @@ -24,4 +24,5 @@ dependencies: - tk=8.6.10 - xz=5.2.5 - zlib=1.2.11 +prefix: /home/bgrande/miniconda3/envs/test-samtools diff --git a/envs/wget/wget-1.20.1.yaml b/envs/wget/wget-1.20.1.yaml index fe718c1c..beb55a09 100644 --- a/envs/wget/wget-1.20.1.yaml +++ b/envs/wget/wget-1.20.1.yaml @@ -17,4 +17,4 @@ dependencies: - openssl=1.1.1h - wget=1.20.1 - zlib=1.2.11 - +prefix: /home/lhilton/miniconda3/envs/wget-test From 82727a3f0a40302ea539fa7ad7f185019c6f8d81 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Sat, 1 May 2021 17:44:51 -0700 Subject: [PATCH 33/35] increase default cnv2igv version --- modules/battenberg/1.2/config/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/battenberg/1.2/config/default.yaml b/modules/battenberg/1.2/config/default.yaml index afbafe69..85c65aa6 100644 --- a/modules/battenberg/1.2/config/default.yaml +++ b/modules/battenberg/1.2/config/default.yaml @@ -4,7 +4,7 @@ lcr-modules: # Available wildcards: {seq_type} {genome_build} {sample_id} sample_bam: "__UPDATE__" battenberg_script: "{MODSDIR}/src/battenberg_wgs_hg38.R" - cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.3/cnv2igv.py" + cnv2igv: "{SCRIPTSDIR}/cnv2igv/1.4/cnv2igv.py" src_dir: "{MODSDIR}/src/" scratch_subdirectories: [] From e5bfc2f6ae154a71d07942ad7defe89dbc7f429d Mon Sep 17 00:00:00 2001 From: Kdreval Date: Sat, 1 May 2021 17:49:46 -0700 Subject: [PATCH 34/35] move shebang to be first line --- modules/battenberg/1.2/src/reference_correction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/battenberg/1.2/src/reference_correction.py b/modules/battenberg/1.2/src/reference_correction.py index 236d63d7..b38e6fd3 100644 --- a/modules/battenberg/1.2/src/reference_correction.py +++ b/modules/battenberg/1.2/src/reference_correction.py @@ -1,3 +1,6 @@ +#!/usr/bin/python + + ##### ATTRIBUTION ##### @@ -7,7 +10,6 @@ # Replaces the placeholder value in the impute_info.txt with the correct path # where the reference files downloaded are stored. -#!/usr/bin/env Python script # # Usage: # python /reference_correction.py From 381887835f60446f39d83747fb009201c9d53cba Mon Sep 17 00:00:00 2001 From: Kdreval Date: Sat, 1 May 2021 17:51:37 -0700 Subject: [PATCH 35/35] add shebang --- modules/battenberg/1.2/src/reference_correction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/battenberg/1.2/src/reference_correction.py b/modules/battenberg/1.2/src/reference_correction.py index b38e6fd3..4b2b881c 100644 --- a/modules/battenberg/1.2/src/reference_correction.py +++ b/modules/battenberg/1.2/src/reference_correction.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ##### ATTRIBUTION #####