diff --git a/modules/ichorcna/1.1/config/default.yaml b/modules/ichorcna/1.1/config/default.yaml index 4844ba01..1e9bf7ba 100644 --- a/modules/ichorcna/1.1/config/default.yaml +++ b/modules/ichorcna/1.1/config/default.yaml @@ -24,19 +24,51 @@ lcr-modules: ichorCNA_normalPanel: "1000000": "inst/extdata/HD_ULP_PoN_{genome_build}_1Mb_median_normAutosome_median.rds" "500000": "inst/extdata/HD_ULP_PoN_{genome_build}_500kb_median_normAutosome_median.rds" + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + # ichorCNA has a preset panel of normal (PoN) with the module, but generating your own PoN is usedful for reducing noise and improving accuracy if needed. + # Note: use ichorCNA's createPanelOfNormals.R in 00-inputs/ichorCNA/scripts/ + # i.e. Rscript createPanelOfNormals.R --filelist /path/to/wig_files.txt --gcWig /path/to/gc.wig --mapWig /path/to/map.wig --centromere /path/to/centromeres_file.txt --outfile base_outfile_name + # where, filelist is a list containing a list of paths to all normals in the panel + # gcWig is a GC wig for the reference genome (if you make a new one, update the ichorCNA_gcWig_custom) + # mapWig is a mappability wig for the reference genome (if you make a new one, update the ichorCNA_mapWig_custom) + # centromere is a file containing centromere locations (see ichorCNA_centromere parameter) + # optional: --exons.bed can be used if target panel used + # the output file is an .rds file + ichorCNA_normalPanel_custom: + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" # must use gc wig file corresponding to same binSize (required) ichorCNA_gcWig: "1000000": "inst/extdata/gc_{genome_build}_1000kb.wig" "500000": "inst/extdata/gc_{genome_build}_500kb.wig" "50000": "inst/extdata/gc_{genome_build}_50kb.wig" "10000": "inst/extdata/gc_{genome_build}_10kb.wig" + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + # create a new gcWig if binSize changes + ichorCNA_gcWig_custom: # must create an empty one for each genome_build you include + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" # must use map wig file corresponding to same binSize (required) ichorCNA_mapWig: "1000000": "inst/extdata/map_{genome_build}_1000kb.wig" "500000": "inst/extdata/map_{genome_build}_500kb.wig" "50000": "inst/extdata/map_{genome_build}_50kb.wig" "10000": "inst/extdata/map_{genome_build}_10kb.wig" + ichorCNA_mapWig_custom: # use bed file if sample has targeted regions, eg. exome data (optional) + # relative to ichorCNA directory (in 00-inputs/ichorCNA/) + #create a new mapWig if binSize changes or if certain segments of the genome (high noise) regions require masking + grch37: "" + hg19: "" + grch38: "" + hg38: "" + hs37d5: "" ichorCNA_exons: NULL ichorCNA_centromere: grch37: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt" @@ -60,15 +92,19 @@ lcr-modules: grch38: "paste0('chr', c(1:22))" hg38: "paste0('chr', c(1:22))" # non-tumor fraction parameter restart values; higher values should be included for cfDNA + # set to "c(0.95, 0.99, 0.995, 0.999)" for low tumour content samples ichorCNA_normal: "c(0.5,0.6,0.7,0.8,0.9,0.95)" # ploidy parameter restart values + # set to ploidy "c(2)" for low tumour content cases ichorCNA_ploidy: "c(2,3,4)" ichorCNA_estimateNormal: TRUE ichorCNA_estimatePloidy: TRUE + # for low tumour content, set estimateClonality to FALSE and set scStates to "c()" ichorCNA_estimateClonality: TRUE # states to use for subclonal CN ichorCNA_scStates: "c(1,3)" # set maximum copy number to use + # set to maxCN 3 for low tumour content - reducing state space will reduce complexity ichorCNA_maxCN: 5 # TRUE/FALSE to include homozygous deletion state # FALSE for low coverage libraries (ex. 0.1x) ; can turn on for higher coverage data (ex. >10x) ichorCNA_includeHOMD: FALSE diff --git a/modules/ichorcna/1.1/ichorcna.smk b/modules/ichorcna/1.1/ichorcna.smk index 8391dee5..dbebc142 100644 --- a/modules/ichorcna/1.1/ichorcna.smk +++ b/modules/ichorcna/1.1/ichorcna.smk @@ -294,6 +294,113 @@ def get_chromosomes_R(wildcards): stringEnd="')" return stringStart + chromosomesR + stringEnd +# New functions just for specific alt-genome builds +def _which_gcwig(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][this_genome_build] + except NameError: + wigs = None + try: + wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][wildcards.genome_build] + except NameError: + wigs = None + if wigs is not None and wigs != "": + return wigs + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg38_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]) + return wig + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/gc_hg19_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]) + return wig + +def _which_mapwig(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][this_genome_build] + except NameError: + wigs = None + try: + wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][wildcards.genome_build] + except NameError: + wigs = None + if wigs is not None and wigs != "": + return wigs + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/map_hg38_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]) + return wig + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_1000kb.wig" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_500kb.wig" + elif "50000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_50kb.wig" + elif "10000" in str({wildcards.binSize}): + return "inst/extdata/map_hg19_10kb.wig" + else: + wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]) + return wig + +def _which_normPanel(wildcards): + CFG = config['lcr-modules']['ichorcna'] + this_genome_build = str(wildcards.genome_build) + try: + rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][this_genome_build] + except NameError: + rds = None + try: + rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][wildcards.genome_build] + except NameError: + rds = None + if rds is not None and rds != "": + return rds + elif "38" in str({wildcards.genome_build}): + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg38_1Mb_median_normAutosome_median.rds" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg38_500kb_median_normAutosome_median.rds" + else: + rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]) + return rds + else: + if "1000000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds" + elif "500000" in str({wildcards.binSize}): + return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds" + else: + rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]) + return rds + rule _run_ichorcna: input: tum = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{tumour_id}.bin{binSize}.wig", @@ -311,9 +418,9 @@ rule _run_ichorcna: name = "{tumour_id}", ploidy = CFG["options"]["run"]["ichorCNA_ploidy"], normal = CFG["options"]["run"]["ichorCNA_normal"], - gcwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]), - mapwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]), - normalpanel = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]), + gcwig = _which_gcwig, + mapwig = _which_mapwig, + normalpanel = _which_normPanel, estimateNormal = CFG["options"]["run"]["ichorCNA_estimateNormal"], estimatePloidy = CFG["options"]["run"]["ichorCNA_estimatePloidy"], estimateClonality = CFG["options"]["run"]["ichorCNA_estimateClonality"], diff --git a/workflows/reference_files/2.4/config/default.yaml b/workflows/reference_files/2.4/config/default.yaml index 5d304136..53f53649 100644 --- a/workflows/reference_files/2.4/config/default.yaml +++ b/workflows/reference_files/2.4/config/default.yaml @@ -37,6 +37,11 @@ genome_builds: version: "grch38" provider: "ensembl" genome_fasta_url: "http://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_rm.primary_assembly.fa.gz" + hg19-reddy_masked: + # hard-masked repeats + version: "grch37" + provider: "ucsc" + genome_fasta_url: "https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.masked.gz" hg19_masked: # hard-masked repeats version: "grch37"