From 190ed3343ed63c49f2a34597ef5738c77f5c1417 Mon Sep 17 00:00:00 2001 From: Jasper Wong Date: Thu, 16 Nov 2023 17:42:52 -0800 Subject: [PATCH 1/3] Updated ichorcna to allow for more flexibility in reference wigs; updated reference workflow to include grch37-noalt --- modules/ichorcna/1.1/config/default.yaml | 21 ++++ modules/ichorcna/1.1/ichorcna.smk | 113 +++++++++++++++++- .../reference_files/2.4/config/default.yaml | 5 + 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/modules/ichorcna/1.1/config/default.yaml b/modules/ichorcna/1.1/config/default.yaml index 4844ba01..176ab09d 100644 --- a/modules/ichorcna/1.1/config/default.yaml +++ b/modules/ichorcna/1.1/config/default.yaml @@ -24,19 +24,40 @@ lcr-modules: ichorCNA_normalPanel: "1000000": "inst/extdata/HD_ULP_PoN_{genome_build}_1Mb_median_normAutosome_median.rds" "500000": "inst/extdata/HD_ULP_PoN_{genome_build}_500kb_median_normAutosome_median.rds" + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + ichorCNA_normalPanel_custom: + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" # must use gc wig file corresponding to same binSize (required) ichorCNA_gcWig: "1000000": "inst/extdata/gc_{genome_build}_1000kb.wig" "500000": "inst/extdata/gc_{genome_build}_500kb.wig" "50000": "inst/extdata/gc_{genome_build}_50kb.wig" "10000": "inst/extdata/gc_{genome_build}_10kb.wig" + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + ichorCNA_gcWig_custom: # must create an empty one for each genome_build unless you have a + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" # must use map wig file corresponding to same binSize (required) ichorCNA_mapWig: "1000000": "inst/extdata/map_{genome_build}_1000kb.wig" "500000": "inst/extdata/map_{genome_build}_500kb.wig" "50000": "inst/extdata/map_{genome_build}_50kb.wig" "10000": "inst/extdata/map_{genome_build}_10kb.wig" + ichorCNA_mapWig_custom: # use bed file if sample has targeted regions, eg. exome data (optional) + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" ichorCNA_exons: NULL ichorCNA_centromere: grch37: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt" diff --git a/modules/ichorcna/1.1/ichorcna.smk b/modules/ichorcna/1.1/ichorcna.smk index 8391dee5..dbebc142 100644 --- a/modules/ichorcna/1.1/ichorcna.smk +++ b/modules/ichorcna/1.1/ichorcna.smk @@ -294,6 +294,113 @@ def get_chromosomes_R(wildcards): stringEnd="')" return stringStart + chromosomesR + stringEnd +# New functions just for specific alt-genome builds +def _which_gcwig(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][this_genome_build] + except NameError: + wigs = None + try: + wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][wildcards.genome_build] + except NameError: + wigs = None + if wigs is not None and wigs != "": + return wigs + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]) + return wig + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]) + return wig + +def _which_mapwig(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][this_genome_build] + except NameError: + wigs = None + try: + wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][wildcards.genome_build] + except NameError: + wigs = None + if wigs is not None and wigs != "": + return wigs + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]) + return wig + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]) + return wig + +def _which_normPanel(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][this_genome_build] + except NameError: + rds = None + try: + rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][wildcards.genome_build] + except NameError: + rds = None + if rds is not None and rds != "": + return rds + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg38_1Mb_median_normAutosome_median.rds" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg38_500kb_median_normAutosome_median.rds" + else: + rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]) + return rds + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds" + else: + rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]) + return rds + rule _run_ichorcna: input: tum = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{tumour_id}.bin{binSize}.wig", @@ -311,9 +418,9 @@ rule _run_ichorcna: name = "{tumour_id}", ploidy = CFG["options"]["run"]["ichorCNA_ploidy"], normal = CFG["options"]["run"]["ichorCNA_normal"], - gcwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]), - mapwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]), - normalpanel = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]), + gcwig = _which_gcwig, + mapwig = _which_mapwig, + normalpanel = _which_normPanel, estimateNormal = CFG["options"]["run"]["ichorCNA_estimateNormal"], estimatePloidy = CFG["options"]["run"]["ichorCNA_estimatePloidy"], estimateClonality = CFG["options"]["run"]["ichorCNA_estimateClonality"], diff --git a/workflows/reference_files/2.4/config/default.yaml b/workflows/reference_files/2.4/config/default.yaml index 5d304136..53f53649 100644 --- a/workflows/reference_files/2.4/config/default.yaml +++ b/workflows/reference_files/2.4/config/default.yaml @@ -37,6 +37,11 @@ genome_builds: version: "grch38" provider: "ensembl" genome_fasta_url: "http://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_rm.primary_assembly.fa.gz" + hg19-reddy_masked: + # hard-masked repeats + version: "grch37" + provider: "ucsc" + genome_fasta_url: "https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.masked.gz" hg19_masked: # hard-masked repeats version: "grch37" From 24be6d8f1d59dac333dfa7740117f55716ec1f94 Mon Sep 17 00:00:00 2001 From: Jasper Wong Date: Mon, 25 Mar 2024 12:14:55 -0700 Subject: [PATCH 2/3] Updated documentation in config --- modules/ichorcna/1.1/config/default.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/ichorcna/1.1/config/default.yaml b/modules/ichorcna/1.1/config/default.yaml index 176ab09d..5cc83f6b 100644 --- a/modules/ichorcna/1.1/config/default.yaml +++ b/modules/ichorcna/1.1/config/default.yaml @@ -25,6 +25,15 @@ lcr-modules: "1000000": "inst/extdata/HD_ULP_PoN_{genome_build}_1Mb_median_normAutosome_median.rds" "500000": "inst/extdata/HD_ULP_PoN_{genome_build}_500kb_median_normAutosome_median.rds" # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + # ichorCNA has a preset panel of normal (PoN) with the module, but generating your own PoN is usedful for reducing noise and improving accuracy if needed. + # Note: use ichorCNA's createPanelOfNormals.R in 00-inputs/ichorCNA/scripts/ + # i.e. Rscript createPanelOfNormals.R --filelist /path/to/wig_files.txt --gcWig /path/to/gc.wig --mapWig /path/to/map.wig --centromere /path/to/centromeres_file.txt --outfile base_outfile_name + # where, filelist is a list containing a list of paths to all normals in the panel + # gcWig is a GC wig for the reference genome (if you make a new one, update the ichorCNA_gcWig_custom) + # mapWig is a mappability wig for the reference genome (if you make a new one, update the ichorCNA_mapWig_custom) + # centromere is a file containing centromere locations (see ichorCNA_centromere parameter) + # optional: --exons.bed can be used if target panel used + # the output file is an .rds file ichorCNA_normalPanel_custom: grch37: "" hg19: "" @@ -38,7 +47,7 @@ lcr-modules: "50000": "inst/extdata/gc_{genome_build}_50kb.wig" "10000": "inst/extdata/gc_{genome_build}_10kb.wig" # relative to ichorCNA directory (in 00-inputs/ichorCNA/) - ichorCNA_gcWig_custom: # must create an empty one for each genome_build unless you have a + ichorCNA_gcWig_custom: # must create an empty one for each genome_build you include grch37: "" hg19: "" grch38: "" @@ -81,15 +90,19 @@ lcr-modules: grch38: "paste0('chr', c(1:22))" hg38: "paste0('chr', c(1:22))" # non-tumor fraction parameter restart values; higher values should be included for cfDNA + # set to "c(0.95, 0.99, 0.995, 0.999)" for low tumour content samples ichorCNA_normal: "c(0.5,0.6,0.7,0.8,0.9,0.95)" # ploidy parameter restart values + # set to ploidy "c(2)" for low tumour content cases ichorCNA_ploidy: "c(2,3,4)" ichorCNA_estimateNormal: TRUE ichorCNA_estimatePloidy: TRUE + # for low tumour content, set estimateClonality to FALSE and set scStates to "c()" ichorCNA_estimateClonality: TRUE # states to use for subclonal CN ichorCNA_scStates: "c(1,3)" # set maximum copy number to use + # set to maxCN 3 for low tumour content - reducing state space will reduce complexity ichorCNA_maxCN: 5 # TRUE/FALSE to include homozygous deletion state # FALSE for low coverage libraries (ex. 0.1x) ; can turn on for higher coverage data (ex. >10x) ichorCNA_includeHOMD: FALSE From e70d5aced5e27f060cd9f2df0aff8ecabc663e5d Mon Sep 17 00:00:00 2001 From: Jasper Wong Date: Mon, 25 Mar 2024 12:21:16 -0700 Subject: [PATCH 3/3] Small doc fix --- modules/ichorcna/1.1/config/default.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ichorcna/1.1/config/default.yaml b/modules/ichorcna/1.1/config/default.yaml index 5cc83f6b..1e9bf7ba 100644 --- a/modules/ichorcna/1.1/config/default.yaml +++ b/modules/ichorcna/1.1/config/default.yaml @@ -47,6 +47,7 @@ lcr-modules: "50000": "inst/extdata/gc_{genome_build}_50kb.wig" "10000": "inst/extdata/gc_{genome_build}_10kb.wig" # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + # create a new gcWig if binSize changes ichorCNA_gcWig_custom: # must create an empty one for each genome_build you include grch37: "" hg19: "" @@ -62,6 +63,7 @@ lcr-modules: ichorCNA_mapWig_custom: # use bed file if sample has targeted regions, eg. exome data (optional) # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + #create a new mapWig if binSize changes or if certain segments of the genome (high noise) regions require masking grch37: "" hg19: "" grch38: ""