Skip to content

Commit

Permalink
Merge branch '414-defining-tumor-purityploidy' of github.com:bihealth…
Browse files Browse the repository at this point in the history
…/snappy-pipeline into 414-defining-tumor-purityploidy
  • Loading branch information
ericblanc20 committed Aug 30, 2023
2 parents 595ce3d + c1a215e commit c6ba0a5
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ rule somatic_targeted_seq_cnv_calling_sequenza_report:
**wf.get_input_files("sequenza", "report"),
output:
**wf.get_output_files("sequenza", "report"),
params:
sample_id=wf.get_params("sequenza", "report"),
threads: wf.get_resource("sequenza", "report", "threads")
resources:
time=wf.get_resource("sequenza", "report", "time"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
# Default configuration somatic_targeted_seq_cnv_calling
step_config:
somatic_targeted_seq_cnv_calling:
tools: ['cnvkit'] # REQUIRED - available: 'cnvkit', 'sequenza', 'cnvetti_on_target' and 'cnvetti_off_target'
tools: ['cnvkit'] # REQUIRED - available: 'cnvkit', 'sequenza', 'cnvetti_on_target', 'cnvetti_off_target' and 'copywriter' (deprecated)
path_ngs_mapping: ../ngs_mapping # REQUIRED
cnvkit:
path_target: REQUIRED # Usually ../panel_of_normals/output/cnvkit.target/out/cnvkit.target.bed
Expand Down Expand Up @@ -165,14 +165,25 @@
smooth_bootstrap: False # [segmetrics] Smooth bootstrap results
sequenza:
length: 50
extra_args: {} # Extra arguments for sequenza bam2seqz, valid values:
# hom: 0.9 # Threshold to select homozygous positions
# het: 0.25 # Threshold to select heterozygous positions
# qlimit: 20 # Minimum nucleotide quality score for inclusion in the counts
# qformat: "sanger" # Quality format, options are "sanger" or "illumina". This will add an offset of 33 or 64 respectively to the qlimit value
assembly: "hg19" # Must be hg38 for GRCh38. See copynumber for complete list (augmented with hg38)
extra_args: {} # Extra arguments for sequenza bam2seqz, valid values:
# hom: 0.9 # Threshold to select homozygous positions
# het: 0.25 # Threshold to select heterozygous positions
# qlimit: 20 # Minimum nucleotide quality score for inclusion in the counts
# qformat: "sanger" # Quality format, options are "sanger" or "illumina". This will add an offset of 33 or 64 respectively to the qlimit value
ignore_chroms: # patterns of chromosome names to ignore
[X, Y, MT, NC_007605. hs37d5, chrEBV, '*_decoy', 'HLA-*', 'GL000220.*'] # Genome hs37d5
# [chrX, chrY, chrM, '*_random', 'chrUn_*', chrEBV, '*_decoy', 'HPV*', CMV, HBV, KSHV, MCV, SV40, 'HCV-*', 'HIV-*', 'HTLV-*'] # Genome GRch38.d1.vd1
extra_args_extract: # Valid arguments: see ?sequenza::sequenza.extract in R
gamma: 60 # scarHRD value
kmin: 50 # scarHRD value
extra_args_fit: # Valid arguments: see ?sequenza::sequenza.fit in R
N.ratio.filter: 10 # scarHRD value
N.BAF.filter: 1 # scarHRD value
segment.filter: 3000000 # scarHRD value
mufreq.treshold: 0.1 # scarHRD value
ratio.priority: False # scarHRD value
ploidy: [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0, 5.1, 5.2, 5.3, 5.4, 5.5]
copywriter:
path_target_regions: REQUIRED # REQUIRED
bin_size: 20000 # TODO: make actually configurable
Expand Down Expand Up @@ -519,6 +530,13 @@ def get_output_files(self, action):
)
)

def get_params(self, action):
self._validate_action(action)
return self._get_params_report

def _get_params_report(self, wildcards):
return wildcards["library_name"]

@dictify
def get_log_file(self, action):
"""Return dict of log files."""
Expand Down
49 changes: 47 additions & 2 deletions snappy_wrappers/wrappers/sequenza/report/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,48 @@
"""

import os
import sys

# The following is required for being able to import snappy_wrappers modules
# inside wrappers. These run in an "inner" snakemake process which uses its
# own conda environment which cannot see the snappy_pipeline installation.
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
sys.path.insert(0, base_dir)

from snakemake import shell

from snappy_wrappers.tools.genome_windows import yield_contigs

__author__ = "Eric Blanc <[email protected]>"


def config_to_r(x):
if x is None:
return "NULL"
if isinstance(x, str):
return f'"{x}"'
if isinstance(x, bool):
return "TRUE" if x else "FALSE"
if isinstance(x, list):
return "c({})".format(", ".join([config_to_r(xx) for xx in x]))
if isinstance(x, dict):
return "list({})".format(
", ".join(['"{}"={}'.format(k, config_to_r(v)) for k, v in x.items()])
)
return str(x)


step = snakemake.config["pipeline_step"]["name"]
config = snakemake.config["step_config"][step]["sequenza"]
genome = snakemake.config["static_data_config"]["reference"]["path"]

f = open(genome + ".fai", "rt")
contigs = config_to_r(list(yield_contigs(f, config.get("ignore_chroms"))))
f.close()

args_extract = config_to_r(dict(config["extra_args_extract"]))
args_fit = config_to_r(dict(config["extra_args_fit"]))

shell.executable("/bin/bash")

shell(
Expand Down Expand Up @@ -35,13 +72,21 @@
R --vanilla --slave << __EOF
library(sequenza)
seqz <- sequenza.extract("{snakemake.input.seqz}")
CP <- sequenza.fit(seqz)
args <- list(file="{snakemake.input.seqz}", assembly="{config[assembly]}", chromosome.list={contigs})
args <- c(args, {args_extract})
seqz <- do.call(sequenza.extract, args=args)
args <- list(sequenza.extract=seqz, chromosome.list={contigs}, mc.cores=1)
args <- c(args, {args_fit})
CP <- do.call(sequenza.fit, args=args)
sequenza.results(sequenza.extract=seqz, cp.table=CP, sample.id="{snakemake.params[sample_id]}", out.dir=dirname("{snakemake.output.done}"))
__EOF
pushd $(dirname {snakemake.output.done}) ; for f in $(ls) ; do md5sum $f > $f.md5 ; done ; popd
touch {snakemake.output.done}
"""
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,6 @@ def test_sequenza_step_part_get_log_file_run(somatic_targeted_seq_cnv_calling_wo

def test_sequenza_step_part_get_input_files_report(somatic_targeted_seq_cnv_calling_workflow):
"""Tests SequenzaStepPart.get_input_files() - action 'report'"""
# wildcards = Wildcards(fromdict={"mapper": "bwa", "library_name": "P001-T1-DNA1-WGS1"})
expected = {
"packages": "work/R_packages/out/sequenza.done",
"seqz": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz",
Expand All @@ -895,6 +894,14 @@ def test_sequenza_step_part_get_output_files_report(somatic_targeted_seq_cnv_cal
assert actual == expected


def test_sequenza_step_part_get_params_report(somatic_targeted_seq_cnv_calling_workflow):
"""Tests SequenzaStepPart.get_params() - action 'report'"""
wildcards = Wildcards(fromdict={"mapper": "bwa", "library_name": "P001-T1-DNA1-WGS1"})
expected = "P001-T1-DNA1-WGS1"
actual = somatic_targeted_seq_cnv_calling_workflow.get_params("sequenza", "report")(wildcards)
assert actual == expected


def test_sequenza_step_part_get_log_file_report(somatic_targeted_seq_cnv_calling_workflow):
"""Tests SequenzaStepPart.get_log_file() - action 'report'"""
base_name = "work/{mapper}.sequenza.{library_name}/log/{mapper}.sequenza.{library_name}.report"
Expand Down

0 comments on commit c6ba0a5

Please sign in to comment.