Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ichorCNA 1.1 update patch #284

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions modules/ichorcna/1.1/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,40 @@ lcr-modules:
ichorCNA_normalPanel:
"1000000": "inst/extdata/HD_ULP_PoN_{genome_build}_1Mb_median_normAutosome_median.rds"
"500000": "inst/extdata/HD_ULP_PoN_{genome_build}_500kb_median_normAutosome_median.rds"
# relative to ichorCNA directory (in 00-inputs/ichorCNA/)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide some documentation about when/why someone might want to use custom normalPanel and gcWig files?

ichorCNA_normalPanel_custom:
grch37: ""
hg19: ""
grch38: ""
hg38: ""
hs37d5: ""
# must use gc wig file corresponding to same binSize (required)
ichorCNA_gcWig:
"1000000": "inst/extdata/gc_{genome_build}_1000kb.wig"
"500000": "inst/extdata/gc_{genome_build}_500kb.wig"
"50000": "inst/extdata/gc_{genome_build}_50kb.wig"
"10000": "inst/extdata/gc_{genome_build}_10kb.wig"
# relative to ichorCNA directory (in 00-inputs/ichorCNA/)
ichorCNA_gcWig_custom: # must create an empty one for each genome_build unless you have a
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think part of the comment is cut off here?

grch37: ""
hg19: ""
grch38: ""
hg38: ""
hs37d5: ""
# must use map wig file corresponding to same binSize (required)
ichorCNA_mapWig:
"1000000": "inst/extdata/map_{genome_build}_1000kb.wig"
"500000": "inst/extdata/map_{genome_build}_500kb.wig"
"50000": "inst/extdata/map_{genome_build}_50kb.wig"
"10000": "inst/extdata/map_{genome_build}_10kb.wig"
ichorCNA_mapWig_custom:
# use bed file if sample has targeted regions, eg. exome data (optional)
# relative to ichorCNA directory (in 00-inputs/ichorCNA/)
grch37: ""
hg19: ""
grch38: ""
hg38: ""
hs37d5: ""
ichorCNA_exons: NULL
ichorCNA_centromere:
grch37: "inst/extdata/GRCh37.p13_centromere_UCSC-gapTable.txt"
Expand Down
113 changes: 110 additions & 3 deletions modules/ichorcna/1.1/ichorcna.smk
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,113 @@ def get_chromosomes_R(wildcards):
stringEnd="')"
return stringStart + chromosomesR + stringEnd

# New functions just for specific alt-genome builds
def _which_gcwig(wildcards):
CFG = config['lcr-modules']['ichorcna']
this_genome_build = str(wildcards.genome_build)
try:
wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][this_genome_build]
except NameError:
wigs = None
try:
wigs = CFG["options"]["run"]["ichorCNA_gcWig_custom"][wildcards.genome_build]
except NameError:
wigs = None
if wigs is not None and wigs != "":
return wigs
elif "38" in str({wildcards.genome_build}):
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg38_1000kb.wig"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg38_500kb.wig"
elif "50000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg38_50kb.wig"
elif "10000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg38_10kb.wig"
else:
wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"])
return wig
else:
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg19_1000kb.wig"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg19_500kb.wig"
elif "50000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg19_50kb.wig"
elif "10000" in str({wildcards.binSize}):
return "inst/extdata/gc_hg19_10kb.wig"
else:
wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"])
return wig

def _which_mapwig(wildcards):
CFG = config['lcr-modules']['ichorcna']
this_genome_build = str(wildcards.genome_build)
try:
wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][this_genome_build]
except NameError:
wigs = None
try:
wigs = CFG["options"]["run"]["ichorCNA_mapWig_custom"][wildcards.genome_build]
except NameError:
wigs = None
if wigs is not None and wigs != "":
return wigs
elif "38" in str({wildcards.genome_build}):
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/map_hg38_1000kb.wig"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/map_hg38_500kb.wig"
elif "50000" in str({wildcards.binSize}):
return "inst/extdata/map_hg38_50kb.wig"
elif "10000" in str({wildcards.binSize}):
return "inst/extdata/map_hg38_10kb.wig"
else:
wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"])
return wig
else:
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/map_hg19_1000kb.wig"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/map_hg19_500kb.wig"
elif "50000" in str({wildcards.binSize}):
return "inst/extdata/map_hg19_50kb.wig"
elif "10000" in str({wildcards.binSize}):
return "inst/extdata/map_hg19_10kb.wig"
else:
wig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"])
return wig

def _which_normPanel(wildcards):
CFG = config['lcr-modules']['ichorcna']
this_genome_build = str(wildcards.genome_build)
try:
rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][this_genome_build]
except NameError:
rds = None
try:
rds = CFG["options"]["run"]["ichorCNA_normalPanel_custom"][wildcards.genome_build]
except NameError:
rds = None
if rds is not None and rds != "":
return rds
elif "38" in str({wildcards.genome_build}):
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/HD_ULP_PoN_hg38_1Mb_median_normAutosome_median.rds"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/HD_ULP_PoN_hg38_500kb_median_normAutosome_median.rds"
else:
rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"])
return rds
else:
if "1000000" in str({wildcards.binSize}):
return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds"
elif "500000" in str({wildcards.binSize}):
return "inst/extdata/HD_ULP_PoN_hg19_1Mb_median_normAutosome_median.rds"
else:
rds = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"])
return rds

rule _run_ichorcna:
input:
tum = CFG["dirs"]["readDepth"] + "{seq_type}--{genome_build}/{binSize}/wig/{tumour_id}.bin{binSize}.wig",
Expand All @@ -311,9 +418,9 @@ rule _run_ichorcna:
name = "{tumour_id}",
ploidy = CFG["options"]["run"]["ichorCNA_ploidy"],
normal = CFG["options"]["run"]["ichorCNA_normal"],
gcwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_gcWig"]),
mapwig = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_mapWig"]),
normalpanel = op.switch_on_wildcard("binSize", CFG["options"]["run"]["ichorCNA_normalPanel"]),
gcwig = _which_gcwig,
mapwig = _which_mapwig,
normalpanel = _which_normPanel,
estimateNormal = CFG["options"]["run"]["ichorCNA_estimateNormal"],
estimatePloidy = CFG["options"]["run"]["ichorCNA_estimatePloidy"],
estimateClonality = CFG["options"]["run"]["ichorCNA_estimateClonality"],
Expand Down
5 changes: 5 additions & 0 deletions workflows/reference_files/2.4/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ genome_builds:
version: "grch38"
provider: "ensembl"
genome_fasta_url: "http://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_rm.primary_assembly.fa.gz"
hg19-reddy_masked:
# hard-masked repeats
version: "grch37"
provider: "ucsc"
genome_fasta_url: "https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.masked.gz"
hg19_masked:
# hard-masked repeats
version: "grch37"
Expand Down