From 9d69778ab3fc98a421078ba2caa67a7999207c8e Mon Sep 17 00:00:00 2001 From: Eudes Barbosa Date: Tue, 28 Jun 2022 11:18:57 +0200 Subject: [PATCH 1/6] Unify output filenames for Control Freec and CopyWriter - changes restricted to code in workflows. --- .../__init__.py | 4 +- .../somatic_wgs_cnv_calling/__init__.py | 64 ++++++++++--------- ...kflows_somatic_targeted_seq_cnv_calling.py | 19 +++--- .../test_workflows_somatic_wgs_cnv_calling.py | 6 ++ 4 files changed, 51 insertions(+), 42 deletions(-) diff --git a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py index ba9e9303b..06054ccc4 100644 --- a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py +++ b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py @@ -810,7 +810,7 @@ def get_output_files(self, action): "gene_log2_txt": "gene_log2.txt", "segments_txt": "segments.txt", } - tpl = "work/{mapper}.copywriter.{library_name}/out/{mapper}.copywriter.{library_name}_" + tpl = "work/{mapper}.copywriter.{library_name}/out/{mapper}.copywriter.{library_name}." output_files = {} for k, v in exts.items(): output_files[k] = tpl + v @@ -908,7 +908,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir) def get_result_files(self): """Return list of result files for the somatic targeted sequencing CNV calling step""" tool_actions = { - "cnvkit": ("call", "report", "export", "plot"), # ("report", "export", "plot"), + "cnvkit": ("call", "report", "export", "plot"), "copywriter": ("call",), "cnvetti_on_target": ("coverage", "segment", "postprocess"), "cnvetti_off_target": ("coverage", "segment", "postprocess"), diff --git a/snappy_pipeline/workflows/somatic_wgs_cnv_calling/__init__.py b/snappy_pipeline/workflows/somatic_wgs_cnv_calling/__init__.py index 49aee187e..e08aac099 100644 --- a/snappy_pipeline/workflows/somatic_wgs_cnv_calling/__init__.py +++ b/snappy_pipeline/workflows/somatic_wgs_cnv_calling/__init__.py @@ -130,10 +130,19 @@ segmentation: HaarSeg normalization: MedianGcBinned control_freec: - path_chrlenfile: REQUIRED #REQUIRED - path_mappability: REQUIRED #REQUIRED - path_mappability_enabled: False - window_size: -1 #set to a value >=0 you want a specific fixed window size + path_mappability: REQUIRED # REQUIRED + breakPointThreshold: 0.8 + coefficientOfVariation: 0.05 + contamination: 0.4 + minCNAlength: 1 + minMappabilityPerWindow: 0.85 + minExpectedGC: 0.35 + maxExpectedGC: 0.55 + minimalSubclonePresence: 0.2 + readCountThreshold: 10 + telocentromeric: 50000 + window: ~ + ignore_chrom: [] convert: org_obj: org.Hs.eg.db::org.Hs.eg.db tx_obj: TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene @@ -475,29 +484,21 @@ def get_output_files(self, action): var_caller=self.name, ext=".ratio.txt.md5" ) elif action == "transform": - transform_ext_names = ("log2", "call", "segments", "cns", "cnr") - transform_ext_values = ( - "_gene_log2.txt", - "_gene_call.txt", - "_segments.txt", - ".cns", - ".cnr", - ) - result = dict( - zip( - transform_ext_names, - expand(self.base_path_out, var_caller=[self.name], ext=transform_ext_values), - ) - ) + transform = { + "log2": ".gene_log2.txt", + "call": ".gene_call.txt", + "segments": ".segments.txt", + "cns": ".cns.txt", + "cnr": ".cnr.txt", + } + for (name, value) in transform.items(): + result[name] = self.base_path_out.format(var_caller=self.name, ext=value) + result[name + "_md5"] = result[name] + ".md5" elif action == "plot": - plot_ext_names = ("heatmap", "scatter", "diagram") - plot_ext_values = (".heatmap.png", ".scatter.png", ".diagram.pdf") - result = dict( - zip( - plot_ext_names, - expand(self.base_path_out, var_caller=[self.name], ext=plot_ext_values), - ) - ) + plot = {"heatmap": ".heatmap.png", "scatter": ".scatter.png", "diagram": ".diagram.pdf"} + for (name, value) in plot.items(): + result[name] = self.base_path_out.format(var_caller=self.name, ext=value) + result[name + "_md5"] = result[name] + ".md5" return result @@ -505,10 +506,7 @@ def check_config(self): """Check configuration for ControlFreec Somatic WGS CNV calling""" if "control_freec" not in (self.config["tools"] or []): # pylint: disable=C0325 return # ControlFreec not enabled, skip # pragma: no cover - self.parent.ensure_w_config( - ("step_config", "somatic_wgs_cnv_calling", "control_freec", "path_chrlenfile"), - "Path to ControlFreec ChrLenFile not configured", - ) + self.parent.ensure_w_config( ("step_config", "somatic_wgs_cnv_calling", "control_freec", "path_mappability"), "Path to ControlFreec mappability file not configured", @@ -612,11 +610,17 @@ def get_result_files(self): ".ratio.txt", ".ratio.txt.md5", ".gene_log2.txt", + ".gene_log2.txt.md5", ".gene_call.txt", + ".gene_call.txt.md5", ".segments.txt", + ".segments.txt.md5", ".scatter.png", + ".scatter.png.md5", ".heatmap.png", + ".heatmap.png.md5", ".diagram.pdf", + ".diagram.pdf.md5", ], ) # Plots for cnvetti diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py index ab162f19f..c6307ee74 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py @@ -854,14 +854,14 @@ def test_copywriter_step_part_get_output_files_call(somatic_targeted_seq_cnv_cal """Tests CopywriterStepPart.get_output_files() - action 'call'""" base_name = "work/{mapper}.copywriter.{library_name}/out/{mapper}.copywriter.{library_name}" expected = { - "bins_txt": base_name + "_bins.txt", - "bins_txt_md5": base_name + "_bins.txt.md5", - "gene_call_txt": base_name + "_gene_call.txt", - "gene_call_txt_md5": base_name + "_gene_call.txt.md5", - "gene_log2_txt": base_name + "_gene_log2.txt", - "gene_log2_txt_md5": base_name + "_gene_log2.txt.md5", - "segments_txt": base_name + "_segments.txt", - "segments_txt_md5": base_name + "_segments.txt.md5", + "bins_txt": base_name + ".bins.txt", + "bins_txt_md5": base_name + ".bins.txt.md5", + "gene_call_txt": base_name + ".gene_call.txt", + "gene_call_txt_md5": base_name + ".gene_call.txt.md5", + "gene_log2_txt": base_name + ".gene_log2.txt", + "gene_log2_txt_md5": base_name + ".gene_log2.txt.md5", + "segments_txt": base_name + ".segments.txt", + "segments_txt_md5": base_name + ".segments.txt.md5", } actual = somatic_targeted_seq_cnv_calling_workflow.get_output_files("copywriter", "call") assert actual == expected @@ -1042,7 +1042,7 @@ def test_somatic_targeted_seq_cnv_calling_workflow(somatic_targeted_seq_cnv_call # copywriter tpl = ( "output/bwa.copywriter.P00{i}-T{t}-DNA1-WGS1/out/" - "bwa.copywriter.P00{i}-T{t}-DNA1-WGS1_{ext}" + "bwa.copywriter.P00{i}-T{t}-DNA1-WGS1.{ext}" ) expected += [ tpl.format(i=i, t=t, ext=ext) @@ -1060,6 +1060,5 @@ def test_somatic_targeted_seq_cnv_calling_workflow(somatic_targeted_seq_cnv_call ] expected = list(sorted(expected)) actual = list(sorted(somatic_targeted_seq_cnv_calling_workflow.get_result_files())) - # HACK TODO actual = [f for f in actual if "/log/" not in f] assert expected == actual diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_wgs_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_wgs_cnv_calling.py index 781c9188d..d8d8e0cca 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_somatic_wgs_cnv_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_wgs_cnv_calling.py @@ -460,11 +460,17 @@ def test_somatic_cnv_calling_workflow(somatic_wgs_cnv_calling_workflow): "ratio.txt", "ratio.txt.md5", "gene_log2.txt", + "gene_log2.txt.md5", "gene_call.txt", + "gene_call.txt.md5", "segments.txt", + "segments.txt.md5", "heatmap.png", + "heatmap.png.md5", "scatter.png", + "scatter.png.md5", "diagram.pdf", + "diagram.pdf.md5", ) for mapper in ("bwa",) for cnv_caller in ("control_freec",) From 1dad99c31d8d353d87898a521603bfaa13f19d0a Mon Sep 17 00:00:00 2001 From: Eudes Barbosa Date: Tue, 28 Jun 2022 14:33:45 +0200 Subject: [PATCH 2/6] Now outputs both Full and Filtered VCF files for mutect. Renamed 'common' to 'biallelic' variants - follow GATK convention. --- .../workflows/somatic_variant_calling/__init__.py | 15 ++++++++++++--- .../wrappers/mutect2/pileup/wrapper.py | 9 +++++---- .../wrappers/mutect_par/parallel_mutect.py | 13 ++++++------- .../wrappers/scalpel/somatic/wrapper.py | 2 +- .../test_workflows_somatic_variant_calling.py | 11 ++++++++++- .../wrappers/data/mutect_par.snakemake | 6 +++--- .../wrappers/test_mutect_par_run.py | 6 +++--- 7 files changed, 40 insertions(+), 22 deletions(-) diff --git a/snappy_pipeline/workflows/somatic_variant_calling/__init__.py b/snappy_pipeline/workflows/somatic_variant_calling/__init__.py index 2d46b6956..6e4f6a6a4 100644 --- a/snappy_pipeline/workflows/somatic_variant_calling/__init__.py +++ b/snappy_pipeline/workflows/somatic_variant_calling/__init__.py @@ -98,7 +98,16 @@ __author__ = "Manuel Holtgrewe " #: Extensions of files to create as main payload -EXT_VALUES = (".vcf.gz", ".vcf.gz.tbi", ".vcf.gz.md5", ".vcf.gz.tbi.md5") +EXT_VALUES = ( + ".vcf.gz", + ".vcf.gz.tbi", + ".vcf.gz.md5", + ".vcf.gz.tbi.md5", + ".full.vcf.gz", + ".full.vcf.gz.tbi", + ".full.vcf.gz.md5", + ".full.vcf.gz.tbi.md5", +) #: Names of the files to create for the extension EXT_NAMES = ("vcf", "tbi", "vcf_md5", "tbi_md5") @@ -234,9 +243,9 @@ - 'GL000220.*' # Contig with problematic, repetitive DNA in GRCh37 # Configuration for MuTect 2 mutect2: - panel_of_normals: '' # Set path to panel of normals vcf if required + panel_of_normals: '' # Set path to panel of normals vcf if required germline_resource: REQUIRED # Germline variants resource (same as panel of normals) - common_variants: REQUIRED # Common germline variants for contamination estimation + common_biallelic: REQUIRED # Common biallelic germline variants for contamination estimation # Parallelization configuration num_cores: 2 # number of cores to use locally window_length: 50000000 # split input into windows of this size, each triggers a job diff --git a/snappy_wrappers/wrappers/mutect2/pileup/wrapper.py b/snappy_wrappers/wrappers/mutect2/pileup/wrapper.py index 88ea8d8fa..a21685ca1 100644 --- a/snappy_wrappers/wrappers/mutect2/pileup/wrapper.py +++ b/snappy_wrappers/wrappers/mutect2/pileup/wrapper.py @@ -7,8 +7,8 @@ __author__ = "Manuel Holtgrewe " reference = snakemake.config["static_data_config"]["reference"]["path"] -common_variants = snakemake.config["step_config"]["somatic_variant_calling"]["mutect2"][ - "common_variants" +common_biallelic = snakemake.config["step_config"]["somatic_variant_calling"]["mutect2"][ + "common_biallelic" ] shell.executable("/bin/bash") @@ -44,8 +44,8 @@ gatk --java-options '-Xms4000m -Xmx8000m' GetPileupSummaries \ --input {snakemake.input.bam} \ --reference {reference} \ - --variant {common_variants} \ - --intervals {common_variants} \ + --variant {common_biallelic} \ + --intervals {common_biallelic} \ --output $out_base.pileup pushd $TMPDIR && \ @@ -54,6 +54,7 @@ done && \ popd +mkdir -p $(dirname {snakemake.output.pileup}) mv $out_base.* $(dirname {snakemake.output.pileup}) """ ) diff --git a/snappy_wrappers/wrappers/mutect_par/parallel_mutect.py b/snappy_wrappers/wrappers/mutect_par/parallel_mutect.py index db8d77797..f069956b3 100644 --- a/snappy_wrappers/wrappers/mutect_par/parallel_mutect.py +++ b/snappy_wrappers/wrappers/mutect_par/parallel_mutect.py @@ -24,14 +24,13 @@ class ParallelMutectWrapper(ParallelSomaticVariantCallingBaseWrapper): """Parallel execution of MuTect""" - # TODO: probably, nobody looked at anything but the vcf/tbi files... get rid of them? realpath_output_keys = ( "vcf", "vcf_md5", "tbi", "tbi_md5", - "full_vcf", - "full_vcf_md5", + "full", + "full_md5", "full_tbi", "full_tbi", "full_tbi_md5", @@ -48,8 +47,8 @@ class ParallelMutectWrapper(ParallelSomaticVariantCallingBaseWrapper): "vcf_md5": "vcf.gz.md5", "tbi": "vcf.gz.tbi", "tbi_md5": "vcf.gz.tbi.md5", - "full_vcf": "full.vcf.gz", - "full_vcf_md5": "full.vcf.gz.md5", + "full": "full.vcf.gz", + "full_md5": "full.vcf.gz.md5", "full_tbi": "full.vcf.gz.tbi", "full_tbi_md5": "full.vcf.gz.tbi.md5", "wig": "full.wig.txt.gz", @@ -135,8 +134,8 @@ def construct_merge_rule(self): mkdir -p $(dirname {{output.txt}}) mv output/result.full.out.txt.gz {{output.txt}} mv output/result.full.out.txt.gz.md5 {{output.txt_md5}} - mv output/result.full.vcf.gz {{output.full_vcf}} - mv output/result.full.vcf.gz.md5 {{output.full_vcf_md5}} + mv output/result.full.vcf.gz {{output.full}} + mv output/result.full.vcf.gz.md5 {{output.full_md5}} mv output/result.full.vcf.gz.tbi {{output.full_tbi}} mv output/result.full.vcf.gz.tbi.md5 {{output.full_tbi_md5}} mv output/result.vcf.gz {{output.vcf}} diff --git a/snappy_wrappers/wrappers/scalpel/somatic/wrapper.py b/snappy_wrappers/wrappers/scalpel/somatic/wrapper.py index bbb671158..582b936e5 100644 --- a/snappy_wrappers/wrappers/scalpel/somatic/wrapper.py +++ b/snappy_wrappers/wrappers/scalpel/somatic/wrapper.py @@ -109,7 +109,7 @@ # split out somatic variants bcftools view \ - --include 'INFO/INH=="no" & INFO/SOMATIC==1' \ + --include 'FILTER=="PASS" & INFO/INH=="no" & INFO/SOMATIC==1' \ {snakemake.output.full_vcf} \ | bgzip -c \ > {snakemake.output.vcf} diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py index 3cd972be8..51a6189c8 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py @@ -935,7 +935,16 @@ def test_somatic_variant_calling_workflow(somatic_variant_calling_workflow): expected = [ tpl.format(mapper=mapper, var_caller=var_caller, i=i, t=t, ext=ext) for i, t in ((1, 1), (2, 1), (2, 2)) - for ext in ("vcf.gz", "vcf.gz.md5", "vcf.gz.tbi", "vcf.gz.tbi.md5") + for ext in ( + "vcf.gz", + "vcf.gz.md5", + "vcf.gz.tbi", + "vcf.gz.tbi.md5", + "full.vcf.gz", + "full.vcf.gz.md5", + "full.vcf.gz.tbi", + "full.vcf.gz.tbi.md5", + ) for mapper in ("bwa",) for var_caller in ("mutect", "scalpel") ] diff --git a/tests/snappy_wrappers/wrappers/data/mutect_par.snakemake b/tests/snappy_wrappers/wrappers/data/mutect_par.snakemake index 4d9e7033b..da045b689 100644 --- a/tests/snappy_wrappers/wrappers/data/mutect_par.snakemake +++ b/tests/snappy_wrappers/wrappers/data/mutect_par.snakemake @@ -1,6 +1,6 @@ rule merge_all: input: ['job_out.0.d/.done', 'job_out.1.d/.done', 'job_out.2.d/.done', 'job_out.3.d/.done', 'job_out.4.d/.done'] - output: **{'txt': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.out.txt.gz', 'txt_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.out.txt.gz.md5', 'vcf': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1vcf.gz', 'vcf_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1vcf.gz.md5', 'tbi': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1vcf.gz.tbi', 'tbi_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1vcf.gz.tbi.md5', 'full_vcf': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.vcf.gz', 'full_vcf_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.vcf.gz.md5', 'full_tbi': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.vcf.gz.tbi', 'full_tbi_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.vcf.gz.tbi.md5', 'wig': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.wig.txt.gz', 'wig_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1full.wig.txt.gz.md5'} + output: **{'txt': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.out.txt.gz', 'txt_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.out.txt.gz.md5', 'vcf': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.vcf.gz', 'vcf_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.vcf.gz.md5', 'tbi': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.vcf.gz.tbi', 'tbi_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.vcf.gz.tbi.md5', 'full': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.vcf.gz', 'full_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.vcf.gz.md5', 'full_tbi': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.vcf.gz.tbi', 'full_tbi_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.vcf.gz.tbi.md5', 'wig': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.wig.txt.gz', 'wig_md5': '/work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1.full.wig.txt.gz.md5'} threads: resource_merge_threads resources: time=resource_merge_time, @@ -53,8 +53,8 @@ rule merge_all: mkdir -p $(dirname {output.txt}) mv output/result.full.out.txt.gz {output.txt} mv output/result.full.out.txt.gz.md5 {output.txt_md5} - mv output/result.full.vcf.gz {output.full_vcf} - mv output/result.full.vcf.gz.md5 {output.full_vcf_md5} + mv output/result.full.vcf.gz {output.full} + mv output/result.full.vcf.gz.md5 {output.full_md5} mv output/result.full.vcf.gz.tbi {output.full_tbi} mv output/result.full.vcf.gz.tbi.md5 {output.full_tbi_md5} mv output/result.vcf.gz {output.vcf} diff --git a/tests/snappy_wrappers/wrappers/test_mutect_par_run.py b/tests/snappy_wrappers/wrappers/test_mutect_par_run.py index 4a3ce93f4..466e13f5a 100644 --- a/tests/snappy_wrappers/wrappers/test_mutect_par_run.py +++ b/tests/snappy_wrappers/wrappers/test_mutect_par_run.py @@ -78,7 +78,7 @@ def minimal_config(): @pytest.fixture def snakemake_output_dict(): """Returns dictionary that defined snakemake.output""" - output_base_name = "work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1" + output_base_name = "work/bwa.mutect.P001-T1-DNA1-WGS1/out/bwa.mutect.P001-T1-DNA1-WGS1." return { "txt": output_base_name + "full.out.txt.gz", "txt_md5": output_base_name + "full.out.txt.gz.md5", @@ -86,8 +86,8 @@ def snakemake_output_dict(): "vcf_md5": output_base_name + "vcf.gz.md5", "tbi": output_base_name + "vcf.gz.tbi", "tbi_md5": output_base_name + "vcf.gz.tbi.md5", - "full_vcf": output_base_name + "full.vcf.gz", - "full_vcf_md5": output_base_name + "full.vcf.gz.md5", + "full": output_base_name + "full.vcf.gz", + "full_md5": output_base_name + "full.vcf.gz.md5", "full_tbi": output_base_name + "full.vcf.gz.tbi", "full_tbi_md5": output_base_name + "full.vcf.gz.tbi.md5", "wig": output_base_name + "full.wig.txt.gz", From ff3aaa719b2a19be6c7fccf42f7633b9b4b23b59 Mon Sep 17 00:00:00 2001 From: Eudes Barbosa Date: Tue, 28 Jun 2022 14:42:47 +0200 Subject: [PATCH 3/6] Added hack to catch path to . Adjusted multiple filter conditions in eb_filter wrapper. --- snappy_wrappers/wrappers/eb_filter/wrapper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snappy_wrappers/wrappers/eb_filter/wrapper.py b/snappy_wrappers/wrappers/eb_filter/wrapper.py index 568a25926..991cb4152 100644 --- a/snappy_wrappers/wrappers/eb_filter/wrapper.py +++ b/snappy_wrappers/wrappers/eb_filter/wrapper.py @@ -37,10 +37,13 @@ fi fi +# Hack: get back bin directory of base/root environment - find `snappy_vcf_sort` +export PATH=$PATH:$(dirname $(dirname $(which conda)))/bin + # Used to be: # filter='FILTER == "germline_risk" || FILTER == "t_lod_fstar" || FILTER == "OffExome" || ANN ~ "stream_gene_variant"' if [[ {snakemake.input.vcf} == *"mutect2"* ]]; then - filter='FILTER == "germline" || FILTER == "weak_evidence" || FILTER == "OffExome" || ANN ~ "stream_gene_variant"' + filter='FILTER ~ "germline" || FILTER ~ "weak_evidence" || FILTER ~ "OffExome" || ANN ~ "stream_gene_variant"' {cmd_fetch} \ | bcftools view \ -e "$filter" \ From 59315cdcd56301702862f9737cf17eea49411c66 Mon Sep 17 00:00:00 2001 From: Eudes Barbosa Date: Tue, 28 Jun 2022 16:36:51 +0200 Subject: [PATCH 4/6] Included mutect2 in the somatic_variant_calling workflow test. --- .../workflows/test_workflows_somatic_variant_calling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py index 51a6189c8..fc43f2fcf 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_variant_calling.py @@ -41,6 +41,7 @@ def minimal_config(): somatic_variant_calling: tools: - mutect + - mutect2 - scalpel scalpel: path_target_regions: /path/to/target/regions.bed @@ -925,7 +926,7 @@ def test_somatic_variant_calling_workflow(somatic_variant_calling_workflow): # Perform the tests # # Check created sub steps - expected = ["link_out", "mutect", "scalpel"] + expected = ["link_out", "mutect", "mutect2", "scalpel"] assert set(expected).issubset(list(sorted(somatic_variant_calling_workflow.sub_steps.keys()))) # Check result file construction tpl = ( @@ -946,7 +947,7 @@ def test_somatic_variant_calling_workflow(somatic_variant_calling_workflow): "full.vcf.gz.tbi.md5", ) for mapper in ("bwa",) - for var_caller in ("mutect", "scalpel") + for var_caller in ("mutect", "mutect2", "scalpel") ] # add log files tpl = ( @@ -965,7 +966,7 @@ def test_somatic_variant_calling_workflow(somatic_variant_calling_workflow): "log.md5", ) for mapper in ("bwa",) - for var_caller in ("mutect", "scalpel") + for var_caller in ("mutect", "mutect2", "scalpel") ] expected = list(sorted(expected)) actual = list(sorted(somatic_variant_calling_workflow.get_result_files())) From be05d601e45d9c794f8c04e908806521b13b5f09 Mon Sep 17 00:00:00 2001 From: Eudes Barbosa Date: Tue, 28 Jun 2022 16:47:56 +0200 Subject: [PATCH 5/6] Fixed log definition in wrappers. --- .../wrappers/mutect2/contamination/wrapper.py | 10 +++++----- snappy_wrappers/wrappers/mutect2/filter/wrapper.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/snappy_wrappers/wrappers/mutect2/contamination/wrapper.py b/snappy_wrappers/wrappers/mutect2/contamination/wrapper.py index fb8091c6f..3af1e731b 100644 --- a/snappy_wrappers/wrappers/mutect2/contamination/wrapper.py +++ b/snappy_wrappers/wrappers/mutect2/contamination/wrapper.py @@ -18,13 +18,13 @@ export LD_LIBRARY_PATH=$(dirname $(which bgzip))/../lib # Also pipe everything to log file -if [[ -n "{snakemake.log}" ]]; then +if [[ -n "{snakemake.log.log}" ]]; then if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - exec &> >(tee -a "{snakemake.log}" >&2) + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + exec &> >(tee -a "{snakemake.log.log}" >&2) else - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - echo "No tty, logging disabled" >"{snakemake.log}" + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + echo "No tty, logging disabled" >"{snakemake.log.log}" fi fi diff --git a/snappy_wrappers/wrappers/mutect2/filter/wrapper.py b/snappy_wrappers/wrappers/mutect2/filter/wrapper.py index 3a20ea262..067c6ca8d 100644 --- a/snappy_wrappers/wrappers/mutect2/filter/wrapper.py +++ b/snappy_wrappers/wrappers/mutect2/filter/wrapper.py @@ -18,13 +18,13 @@ export LD_LIBRARY_PATH=$(dirname $(which bgzip))/../lib # Also pipe everything to log file -if [[ -n "{snakemake.log}" ]]; then +if [[ -n "{snakemake.log.log}" ]]; then if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - exec &> >(tee -a "{snakemake.log}" >&2) + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + exec &> >(tee -a "{snakemake.log.log}" >&2) else - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - echo "No tty, logging disabled" >"{snakemake.log}" + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + echo "No tty, logging disabled" >"{snakemake.log.log}" fi fi From 15cd954097dc4d90576479f871b45c29a9f4f0e8 Mon Sep 17 00:00:00 2001 From: Eric Blanc Date: Fri, 11 Feb 2022 14:19:10 +0100 Subject: [PATCH 6/6] Collect target coverage disabled when no target coverage files requested --- snappy_pipeline/workflows/ngs_mapping/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/snappy_pipeline/workflows/ngs_mapping/__init__.py b/snappy_pipeline/workflows/ngs_mapping/__init__.py index 02972e403..cccc9b4f0 100644 --- a/snappy_pipeline/workflows/ngs_mapping/__init__.py +++ b/snappy_pipeline/workflows/ngs_mapping/__init__.py @@ -1269,9 +1269,11 @@ def get_result_files(self): ) ) + target_coverage = False for sheet in self.shortcut_sheets: for ngs_library in sheet.all_ngs_libraries: if ngs_library.name in self.ngs_library_to_kit: + target_coverage = True extraction_type = ngs_library.test_sample.extra_infos["extractionType"] suffix = ( "_long" @@ -1287,8 +1289,9 @@ def get_result_files(self): ngs_library=[ngs_library], ext=["txt", "txt.md5"], ) - yield "output/target_cov_report/out/target_cov_report.txt" - yield "output/target_cov_report/out/target_cov_report.txt.md5" + if target_coverage: + yield "output/target_cov_report/out/target_cov_report.txt" + yield "output/target_cov_report/out/target_cov_report.txt.md5" if ( self.config["picard_hs_metrics"]["path_targets_interval_list"] and self.config["picard_hs_metrics"]["path_baits_interval_list"]