From c1a215ed31f50d4237e8a5f7f285733241eebffe Mon Sep 17 00:00:00 2001
From: ericblanc20 <eric.blanc@bih-charite.de>
Date: Wed, 30 Aug 2023 17:44:48 +0200
Subject: [PATCH] feat: added support for sequenza (414-2) (#436)

---
 .../Snakefile                                 |  73 +++++++
 .../__init__.py                               | 199 ++++++++++++++----
 snappy_wrappers/wrappers/r/wrapper.py         |   4 +-
 .../wrappers/sequenza/environment.yaml        |  10 +
 .../sequenza/gcreference/environment.yaml     |   1 +
 .../wrappers/sequenza/gcreference/wrapper.py  |  50 +++++
 .../wrappers/sequenza/report/environment.yaml |   1 +
 .../wrappers/sequenza/report/wrapper.py       |  98 +++++++++
 .../wrappers/sequenza/run/environment.yaml    |   1 +
 .../wrappers/sequenza/run/wrapper.py          |  71 +++++++
 ...kflows_somatic_targeted_seq_cnv_calling.py | 134 +++++++++++-
 11 files changed, 601 insertions(+), 41 deletions(-)
 create mode 100644 snappy_wrappers/wrappers/sequenza/environment.yaml
 create mode 120000 snappy_wrappers/wrappers/sequenza/gcreference/environment.yaml
 create mode 100644 snappy_wrappers/wrappers/sequenza/gcreference/wrapper.py
 create mode 120000 snappy_wrappers/wrappers/sequenza/report/environment.yaml
 create mode 100644 snappy_wrappers/wrappers/sequenza/report/wrapper.py
 create mode 120000 snappy_wrappers/wrappers/sequenza/run/environment.yaml
 create mode 100644 snappy_wrappers/wrappers/sequenza/run/wrapper.py

diff --git a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/Snakefile b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/Snakefile
index 91d449628..37c8217da 100644
--- a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/Snakefile
+++ b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/Snakefile
@@ -233,3 +233,76 @@ rule somatic_targeted_seq_cnv_calling_cnvetti_off_target_postprocess:
         **wf.get_log_file("cnvetti_off_target", "postprocess"),
     wrapper:
         wf.wrapper_path("cnvetti/on_target/postprocess")
+
+
+# Run sequenza ----------------------------------------------------
+
+
+rule somatic_targeted_seq_cnv_calling_sequenza_install:
+    output:
+        **wf.get_output_files("sequenza", "install"),
+    params:
+        packages=[
+            {"name": "aroneklund/copynumber", "repo": "github"},
+        ],
+    threads: wf.get_resource("sequenza", "install", "threads")
+    resources:
+        time=wf.get_resource("sequenza", "install", "time"),
+        memory=wf.get_resource("sequenza", "install", "memory"),
+        partition=wf.get_resource("sequenza", "install", "partition"),
+        tmpdir=wf.get_resource("sequenza", "install", "tmpdir"),
+    log:
+        **wf.get_log_file("sequenza", "install"),
+    wrapper:
+        wf.wrapper_path("r")
+
+
+rule somatic_targeted_seq_cnv_calling_sequenza_gcreference:
+    output:
+        **wf.get_output_files("sequenza", "gcreference"),
+    threads: wf.get_resource("sequenza", "gcreference", "threads")
+    resources:
+        time=wf.get_resource("sequenza", "gcreference", "time"),
+        memory=wf.get_resource("sequenza", "gcreference", "memory"),
+        partition=wf.get_resource("sequenza", "gcreference", "partition"),
+        tmpdir=wf.get_resource("sequenza", "gcreference", "tmpdir"),
+    log:
+        **wf.get_log_file("sequenza", "gcreference"),
+    wrapper:
+        wf.wrapper_path("sequenza/gcreference")
+
+
+rule somatic_targeted_seq_cnv_calling_sequenza_run:
+    input:
+        unpack(wf.get_input_files("sequenza", "run")),
+    output:
+        **wf.get_output_files("sequenza", "run"),
+    threads: wf.get_resource("sequenza", "run", "threads")
+    resources:
+        time=wf.get_resource("sequenza", "run", "time"),
+        memory=wf.get_resource("sequenza", "run", "memory"),
+        partition=wf.get_resource("sequenza", "run", "partition"),
+        tmpdir=wf.get_resource("sequenza", "run", "tmpdir"),
+    log:
+        **wf.get_log_file("sequenza", "run"),
+    wrapper:
+        wf.wrapper_path("sequenza/run")
+
+
+rule somatic_targeted_seq_cnv_calling_sequenza_report:
+    input:
+        **wf.get_input_files("sequenza", "report"),
+    output:
+        **wf.get_output_files("sequenza", "report"),
+    params:
+        sample_id=wf.get_params("sequenza", "report"),
+    threads: wf.get_resource("sequenza", "report", "threads")
+    resources:
+        time=wf.get_resource("sequenza", "report", "time"),
+        memory=wf.get_resource("sequenza", "report", "memory"),
+        partition=wf.get_resource("sequenza", "report", "partition"),
+        tmpdir=wf.get_resource("sequenza", "report", "tmpdir"),
+    log:
+        **wf.get_log_file("sequenza", "report"),
+    wrapper:
+        wf.wrapper_path("sequenza/report")
diff --git a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py
index 4f80a78c5..484247adc 100644
--- a/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py
+++ b/snappy_pipeline/workflows/somatic_targeted_seq_cnv_calling/__init__.py
@@ -104,6 +104,7 @@
 from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
 from snakemake.io import expand
 
+from snappy_pipeline.base import UnsupportedActionException
 from snappy_pipeline.utils import dictify, listify
 from snappy_pipeline.workflows.abstract import (
     BaseStep,
@@ -120,7 +121,7 @@
 # Default configuration somatic_targeted_seq_cnv_calling
 step_config:
   somatic_targeted_seq_cnv_calling:
-    tools: ['cnvkit']  # REQUIRED - available: 'cnvkit', 'copywriter', 'cnvetti_on_target' and 'cnvetti_off_target'
+    tools: ['cnvkit']  # REQUIRED - available: 'cnvkit', 'sequenza', 'cnvetti_on_target', 'cnvetti_off_target' and 'copywriter' (deprecated)
     path_ngs_mapping: ../ngs_mapping  # REQUIRED
     cnvkit:
       path_target: REQUIRED             # Usually ../panel_of_normals/output/cnvkit.target/out/cnvkit.target.bed
@@ -162,6 +163,27 @@
       segmetrics_alpha: 0.05            # [segmetrics] Significance cutoff
       segmetrics_bootstrap: 100         # [segmetrics] Number of bootstraps
       smooth_bootstrap: False           # [segmetrics] Smooth bootstrap results
+    sequenza:
+      length: 50
+      assembly: "hg19"     # Must be hg38 for GRCh38. See copynumber for complete list (augmented with hg38)
+      extra_args: {}       # Extra arguments for sequenza bam2seqz, valid values:
+      # hom: 0.9           # Threshold to select homozygous positions
+      # het: 0.25          # Threshold to select heterozygous positions
+      # qlimit: 20         # Minimum nucleotide quality score for inclusion in the counts
+      # qformat: "sanger"  # Quality format, options are "sanger" or "illumina". This will add an offset of 33 or 64 respectively to the qlimit value
+      ignore_chroms: # patterns of chromosome names to ignore
+        [X, Y, MT, NC_007605. hs37d5, chrEBV, '*_decoy', 'HLA-*', 'GL000220.*']  # Genome hs37d5
+      # [chrX, chrY, chrM, '*_random', 'chrUn_*', chrEBV, '*_decoy', 'HPV*', CMV, HBV, KSHV, MCV, SV40, 'HCV-*', 'HIV-*', 'HTLV-*']  # Genome GRch38.d1.vd1
+      extra_args_extract:        # Valid arguments: see ?sequenza::sequenza.extract in R
+        gamma: 60                # scarHRD value
+        kmin: 50                 # scarHRD value
+      extra_args_fit:            # Valid arguments: see ?sequenza::sequenza.fit in R
+        N.ratio.filter: 10       # scarHRD value
+        N.BAF.filter: 1          # scarHRD value
+        segment.filter: 3000000  # scarHRD value
+        mufreq.treshold: 0.1     # scarHRD value
+        ratio.priority: False    # scarHRD value
+        ploidy: [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0, 5.1, 5.2, 5.3, 5.4, 5.5]
     copywriter:
       path_target_regions: REQUIRED # REQUIRED
       bin_size: 20000 # TODO: make actually configurable
@@ -420,6 +442,134 @@ def format_id(*args):
     return {key: "{{{}}}".format(key) for key in args}
 
 
+class SequenzaStepPart(SomaticTargetedSeqCnvCallingStepPart):
+    """Perform somatic targeted CNV calling using sequenza"""
+
+    #: Step name
+    name = "sequenza"
+
+    #: Class available actions
+    actions = (
+        "install",
+        "gcreference",
+        "run",
+        "report",
+    )
+
+    #: Class resource usage dictionary. Key: action type (string); Value: resource (ResourceUsage).
+    resource_usage = {
+        "run": ResourceUsage(
+            threads=4,
+            time="24:00:00",  # 1 day
+            memory="16G",
+        ),
+    }
+
+    def __init__(self, parent):
+        super().__init__(parent)
+
+    def get_input_files(self, action):
+        """Return input paths input function, dependent on rule"""
+        # Validate action
+        self._validate_action(action)
+
+        method_mapping = {
+            "run": self._get_input_files_run(),
+            "report": self._get_input_files_report(),
+        }
+        return method_mapping[action]
+
+    def _get_input_files_run(self):
+        @dictify
+        def input_function(wildcards):
+            ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
+            normal_base_path = (
+                "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
+                    normal_library=self.get_normal_lib_name(wildcards), **wildcards
+                )
+            )
+            tumor_base_path = "output/{mapper}.{library_name}/out/{mapper}.{library_name}".format(
+                **wildcards
+            )
+            yield "gc", "work/static_data/out/sequenza.{length}.wig.gz".format(
+                length=self.config["sequenza"]["length"],
+            )
+            yield "normal_bam", ngs_mapping(normal_base_path + ".bam")
+            yield "normal_bai", ngs_mapping(normal_base_path + ".bam.bai")
+            yield "tumor_bam", ngs_mapping(tumor_base_path + ".bam")
+            yield "tumor_bai", ngs_mapping(tumor_base_path + ".bam.bai")
+
+        return input_function
+
+    def _get_input_files_report(self):
+        return {
+            "packages": "work/R_packages/out/sequenza.done",
+            "seqz": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz",
+        }
+
+    def get_output_files(self, action):
+        if action == "install":
+            return {"done": "work/R_packages/out/sequenza.done"}
+        elif action == "gcreference":
+            return {
+                "gc": "work/static_data/out/sequenza.{length}.wig.gz".format(
+                    length=self.config["sequenza"]["length"],
+                )
+            }
+        elif action == "run":
+            return {
+                "seqz": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz",
+                "seqz_md5": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz.md5",
+            }
+        elif action == "report":
+            return {"done": "work/{mapper}.sequenza.{library_name}/report/.done"}
+        else:
+            raise UnsupportedActionException(
+                "Action '{action}' is not supported. Valid options: {valid}".format(
+                    action=action, valid=", ".join(self.actions)
+                )
+            )
+
+    def get_params(self, action):
+        self._validate_action(action)
+        return self._get_params_report
+
+    def _get_params_report(self, wildcards):
+        return wildcards["library_name"]
+
+    @dictify
+    def get_log_file(self, action):
+        """Return dict of log files."""
+        if action == "install":
+            prefix = "work/R_packages/log/sequenza"
+        elif action == "gcreference":
+            prefix = "work/static_data/log/sequenza.{length}".format(
+                length=self.config["sequenza"]["length"],
+            )
+        elif action == "run":
+            prefix = (
+                "work/{mapper}.sequenza.{library_name}/log/{mapper}.sequenza.{library_name}.run"
+            )
+        elif action == "report":
+            prefix = (
+                "work/{mapper}.sequenza.{library_name}/log/{mapper}.sequenza.{library_name}.report"
+            )
+        else:
+            raise UnsupportedActionException(
+                "Action '{action}' is not supported. Valid options: {valid}".format(
+                    action=action, valid=", ".join(self.actions)
+                )
+            )
+        key_ext = (
+            ("log", ".log"),
+            ("conda_info", ".conda_info.txt"),
+            ("conda_list", ".conda_list.txt"),
+        )
+        for key, ext in key_ext:
+            yield key, prefix + ext
+            yield key + "_md5", prefix + ext + ".md5"
+
+
 class CnvKitStepPart(SomaticTargetedSeqCnvCallingStepPart):
     """Perform somatic targeted CNV calling using cnvkit"""
 
@@ -438,8 +588,11 @@ class CnvKitStepPart(SomaticTargetedSeqCnvCallingStepPart):
         "report",
     )
 
+    # Overwrite defaults
+    default_resource_usage = ResourceUsage(threads=1, time="03:59:59", memory="7680M")  # 4h
+
     #: Class resource usage dictionary. Key: action type (string); Value: resource (ResourceUsage).
-    resource_usage_dict = {
+    resource_usage = {
         "plot": ResourceUsage(
             threads=1,
             time="08:00:00",  # 1 day
@@ -450,11 +603,6 @@ class CnvKitStepPart(SomaticTargetedSeqCnvCallingStepPart):
             time="08:00:00",  # 8 hours
             memory=f"{16 * 1024}M",
         ),
-        "default": ResourceUsage(
-            threads=1,
-            time="03:59:59",  # 1 day
-            memory=f"{int(7.5 * 1024)}M",
-        ),
     }
 
     def __init__(self, parent):
@@ -700,23 +848,6 @@ def get_log_file(self, action):
             log_files[key + "_md5"] = prefix + ext + ".md5"
         return log_files
 
-    def get_resource_usage(self, action):
-        """Get Resource Usage
-
-        :param action: Action (i.e., step) in the workflow, example: 'run'.
-        :type action: str
-
-        :return: Returns ResourceUsage for step.
-        """
-        # Validate action
-        self._validate_action(action)
-        if action == "plot":
-            return self.resource_usage_dict.get("plot")
-        elif action == "coverage":
-            return self.resource_usage_dict.get("coverage")
-        else:
-            return self.resource_usage_dict.get("default")
-
 
 class CopywriterStepPart(SomaticTargetedSeqCnvCallingStepPart):
     """Perform somatic targeted CNV calling using CopywriteR"""
@@ -731,7 +862,7 @@ class CopywriterStepPart(SomaticTargetedSeqCnvCallingStepPart):
     actions_w_in_out = ("run", "call")
 
     #: Class resource usage dictionary. Key: action type (string); Value: resource (ResourceUsage).
-    resource_usage_dict = {
+    resource_usage = {
         "prepare": ResourceUsage(
             threads=1,
             time="02:00:00",  # 2 hours
@@ -868,18 +999,6 @@ def _get_log_file(self, action):
             for key, ext in key_ext:
                 yield key, tpl + ext
 
-    def get_resource_usage(self, action):
-        """Get Resource Usage
-
-        :param action: Action (i.e., step) in the workflow, example: 'run'.
-        :type action: str
-
-        :return: Returns ResourceUsage for step.
-        """
-        # Validate action
-        self._validate_action(action)
-        return self.resource_usage_dict.get(action)
-
 
 class SomaticTargetedSeqCnvCallingWorkflow(BaseStep):
     """Perform somatic targeted sequencing CNV calling"""
@@ -915,6 +1034,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
                 CnvettiOnTargetStepPart,
                 CnvKitStepPart,
                 CopywriterStepPart,
+                SequenzaStepPart,
                 LinkOutStepPart,
             )
         )
@@ -926,6 +1046,7 @@ def get_result_files(self):
         """Return list of result files for the somatic targeted sequencing CNV calling step"""
         tool_actions = {
             "cnvkit": ["fix", "postprocess", "report", "export"],
+            "sequenza": ("run",),
             "copywriter": ("call",),
             "cnvetti_on_target": ("coverage", "segment", "postprocess"),
             "cnvetti_off_target": ("coverage", "segment", "postprocess"),
@@ -951,7 +1072,7 @@ def get_result_files(self):
                         except AttributeError:
                             tpls = [self.sub_steps[tool].get_output_files(action)]
                         try:
-                            tpls += self.sub_steps[tool].get_log_file(action).values()
+                            tpls += list(self.sub_steps[tool].get_log_file(action).values())
                         except AttributeError:
                             tpls += [self.sub_steps[tool].get_log_file(action)]
                         for tpl in tpls:
@@ -961,7 +1082,7 @@ def get_result_files(self):
                                 library_name=[sample_pair.tumor_sample.dna_ngs_library.name],
                             )
                             for f in filenames:
-                                if ".tmp." not in f:
+                                if ".tmp." not in f and not f.endswith("/.done"):
                                     yield f.replace("work/", "output/")
 
     def check_config(self):
diff --git a/snappy_wrappers/wrappers/r/wrapper.py b/snappy_wrappers/wrappers/r/wrapper.py
index a8452b96a..e43570431 100644
--- a/snappy_wrappers/wrappers/r/wrapper.py
+++ b/snappy_wrappers/wrappers/r/wrapper.py
@@ -26,7 +26,9 @@
         to_install[package["repo"]].append(package["name"])
     else:
         to_install["cran"].append(package["name"])
-to_install = {k: "c({})".format(", ".join(['"{}"'.format(vv) for vv in v])) for k, v in to_install.items()}
+to_install = {
+    k: "c({})".format(", ".join(['"{}"'.format(vv) for vv in v])) for k, v in to_install.items()
+}
 
 shell.executable("/bin/bash")
 
diff --git a/snappy_wrappers/wrappers/sequenza/environment.yaml b/snappy_wrappers/wrappers/sequenza/environment.yaml
new file mode 100644
index 000000000..b6a8c8b49
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/environment.yaml
@@ -0,0 +1,10 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - python =3.9
+  - sequenza-utils
+  - r-sequenza
+  - r-devtools
+  - r-data.table
+  - samtools
diff --git a/snappy_wrappers/wrappers/sequenza/gcreference/environment.yaml b/snappy_wrappers/wrappers/sequenza/gcreference/environment.yaml
new file mode 120000
index 000000000..2e107ac86
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/gcreference/environment.yaml
@@ -0,0 +1 @@
+../environment.yaml
\ No newline at end of file
diff --git a/snappy_wrappers/wrappers/sequenza/gcreference/wrapper.py b/snappy_wrappers/wrappers/sequenza/gcreference/wrapper.py
new file mode 100644
index 000000000..0b230a26d
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/gcreference/wrapper.py
@@ -0,0 +1,50 @@
+"""CUBI+Snakemake wrapper code for sequenza GC reference file
+"""
+
+import os
+
+from snakemake import shell
+
+__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"
+
+step = snakemake.config["pipeline_step"]["name"]
+genome = snakemake.config["static_data_config"]["reference"]["path"]
+length = snakemake.config["step_config"][step]["sequenza"]["length"]
+
+shell.executable("/bin/bash")
+
+shell(
+    r"""
+set -x
+
+# Write out information about conda installation.
+conda list >{snakemake.log.conda_list}
+conda info >{snakemake.log.conda_info}
+md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5}
+md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5}
+
+# Also pipe stderr to log file
+if [[ -n "{snakemake.log.log}" ]]; then
+    if [[ "$(set +e; tty; set -e)" != "" ]]; then
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        exec 2> >(tee -a "{snakemake.log.log}" >&2)
+    else
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        echo "No tty, logging disabled" >"{snakemake.log.log}"
+    fi
+fi
+
+sequenza-utils gc_wiggle --fasta {genome} -w {length} -o {snakemake.output.gc}
+
+pushd $(dirname {snakemake.output.gc})
+md5sum $(basename {snakemake.output.gc}) > $(basename {snakemake.output.gc}).md5
+popd
+"""
+)
+
+# Compute MD5 sums of logs.
+shell(
+    r"""
+md5sum {snakemake.log.log} >{snakemake.log.log_md5}
+"""
+)
diff --git a/snappy_wrappers/wrappers/sequenza/report/environment.yaml b/snappy_wrappers/wrappers/sequenza/report/environment.yaml
new file mode 120000
index 000000000..2e107ac86
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/report/environment.yaml
@@ -0,0 +1 @@
+../environment.yaml
\ No newline at end of file
diff --git a/snappy_wrappers/wrappers/sequenza/report/wrapper.py b/snappy_wrappers/wrappers/sequenza/report/wrapper.py
new file mode 100644
index 000000000..28c1b18a6
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/report/wrapper.py
@@ -0,0 +1,98 @@
+"""CUBI+Snakemake wrapper code for sequenza (R part, post-processing)
+"""
+
+import os
+import sys
+
+# The following is required for being able to import snappy_wrappers modules
+# inside wrappers.  These run in an "inner" snakemake process which uses its
+# own conda environment which cannot see the snappy_pipeline installation.
+base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
+sys.path.insert(0, base_dir)
+
+from snakemake import shell
+
+from snappy_wrappers.tools.genome_windows import yield_contigs
+
+__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"
+
+
+def config_to_r(x):
+    if x is None:
+        return "NULL"
+    if isinstance(x, str):
+        return f'"{x}"'
+    if isinstance(x, bool):
+        return "TRUE" if x else "FALSE"
+    if isinstance(x, list):
+        return "c({})".format(", ".join([config_to_r(xx) for xx in x]))
+    if isinstance(x, dict):
+        return "list({})".format(
+            ", ".join(['"{}"={}'.format(k, config_to_r(v)) for k, v in x.items()])
+        )
+    return str(x)
+
+
+step = snakemake.config["pipeline_step"]["name"]
+config = snakemake.config["step_config"][step]["sequenza"]
+genome = snakemake.config["static_data_config"]["reference"]["path"]
+
+f = open(genome + ".fai", "rt")
+contigs = config_to_r(list(yield_contigs(f, config.get("ignore_chroms"))))
+f.close()
+
+args_extract = config_to_r(dict(config["extra_args_extract"]))
+args_fit = config_to_r(dict(config["extra_args_fit"]))
+
+shell.executable("/bin/bash")
+
+shell(
+    r"""
+set -x
+
+# Write out information about conda installation.
+conda list >{snakemake.log.conda_list}
+conda info >{snakemake.log.conda_info}
+md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5}
+md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5}
+
+# Also pipe stderr to log file
+if [[ -n "{snakemake.log.log}" ]]; then
+    if [[ "$(set +e; tty; set -e)" != "" ]]; then
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        exec 2> >(tee -a "{snakemake.log.log}" >&2)
+    else
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        echo "No tty, logging disabled" >"{snakemake.log.log}"
+    fi
+fi
+
+export R_LIBS_USER=$(dirname {snakemake.input.packages})
+
+R --vanilla --slave << __EOF
+library(sequenza)
+
+args <- list(file="{snakemake.input.seqz}", assembly="{config[assembly]}", chromosome.list={contigs})
+args <- c(args, {args_extract})
+seqz <- do.call(sequenza.extract, args=args)
+
+args <- list(sequenza.extract=seqz, chromosome.list={contigs}, mc.cores=1)
+args <- c(args, {args_fit})
+CP <- do.call(sequenza.fit, args=args)
+
+sequenza.results(sequenza.extract=seqz, cp.table=CP, sample.id="{snakemake.params[sample_id]}", out.dir=dirname("{snakemake.output.done}"))
+
+__EOF
+
+pushd $(dirname {snakemake.output.done}) ; for f in $(ls) ; do md5sum $f > $f.md5 ; done ; popd
+
+touch {snakemake.output.done}
+"""
+)
+
+# Compute MD5 sums of logs.
+shell(
+    r"""
+md5sum {snakemake.log.log} >{snakemake.log.log_md5}
+"""
+)
diff --git a/snappy_wrappers/wrappers/sequenza/run/environment.yaml b/snappy_wrappers/wrappers/sequenza/run/environment.yaml
new file mode 120000
index 000000000..2e107ac86
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/run/environment.yaml
@@ -0,0 +1 @@
+../environment.yaml
\ No newline at end of file
diff --git a/snappy_wrappers/wrappers/sequenza/run/wrapper.py b/snappy_wrappers/wrappers/sequenza/run/wrapper.py
new file mode 100644
index 000000000..13fa1f64d
--- /dev/null
+++ b/snappy_wrappers/wrappers/sequenza/run/wrapper.py
@@ -0,0 +1,71 @@
+"""CUBI+Snakemake wrapper code for sequenza (sequenza-utils, pileups)
+"""
+
+import os
+import sys
+
+# The following is required for being able to import snappy_wrappers modules
+# inside wrappers.  These run in an "inner" snakemake process which uses its
+# own conda environment which cannot see the snappy_pipeline installation.
+base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
+sys.path.insert(0, base_dir)
+
+from snakemake import shell
+
+from snappy_wrappers.tools.genome_windows import yield_contigs
+
+__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"
+
+step = snakemake.config["pipeline_step"]["name"]
+config = snakemake.config["step_config"][step]["sequenza"]
+genome = snakemake.config["static_data_config"]["reference"]["path"]
+length = config["length"]
+
+f = open(genome + ".fai", "rt")
+contigs = " ".join(yield_contigs(f, config.get("ignore_chroms")))
+f.close()
+
+extra_arguments = " ".join(
+    ["--{} {}".format(k, v) for k, v in config.get("extra_arguments", {}).items()]
+)
+
+shell.executable("/bin/bash")
+
+shell(
+    r"""
+set -x
+
+# Write out information about conda installation.
+conda list >{snakemake.log.conda_list}
+conda info >{snakemake.log.conda_info}
+md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5}
+md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5}
+
+# Also pipe stderr to log file
+if [[ -n "{snakemake.log.log}" ]]; then
+    if [[ "$(set +e; tty; set -e)" != "" ]]; then
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        exec 2> >(tee -a "{snakemake.log.log}" >&2)
+    else
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        echo "No tty, logging disabled" >"{snakemake.log.log}"
+    fi
+fi
+
+sequenza-utils bam2seqz \
+    -gc {snakemake.input.gc} --fasta {genome} \
+    -n {snakemake.input.normal_bam} --tumor {snakemake.input.tumor_bam} \
+    -C {contigs} {extra_arguments} \
+    | sequenza-utils seqz_binning -s - \
+        -w {length} -o {snakemake.output.seqz}
+
+pushd $(dirname {snakemake.output.seqz}) ; f=$(basename {snakemake.output.seqz}) ; md5sum $f > $f.md5 ; popd
+"""
+)
+
+# Compute MD5 sums of logs.
+shell(
+    r"""
+md5sum {snakemake.log.log} >{snakemake.log.log_md5}
+"""
+)
diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py
index ff622624c..527556a40 100644
--- a/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py
+++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_targeted_seq_cnv_calling.py
@@ -46,6 +46,7 @@ def minimal_config():
             - cnvetti_on_target
             - cnvkit
             - copywriter
+            - sequenza
             cnvkit:
               path_target: /path/to/panel_of_normals/output/cnvkit.target/out/cnvkit.target.bed
               path_antitarget: /path/to/panel_of_normals/output/cnvkit.antitarget/out/cnvkit.antitarget.bed
@@ -811,13 +812,134 @@ def test_copywriter_step_part_get_resource_usage_call(somatic_targeted_seq_cnv_c
         assert actual == expected, msg_error
 
 
+# Tests for SequenzaStepPart ----------------------------------------------------------------------
+
+
+def test_sequenza_step_part_get_output_files_install(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_output_files() - action 'install'"""
+    expected = {"done": "work/R_packages/out/sequenza.done"}
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_output_files("sequenza", "install")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_log_file_install(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_log_file() - action 'install'"""
+    base_name = "work/R_packages/log/sequenza"
+    expected = get_expected_log_files_dict(base_out=base_name)
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_log_file("sequenza", "install")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_output_files_gcreference(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_output_files() - action 'gcreference'"""
+    expected = {"gc": "work/static_data/out/sequenza.50.wig.gz"}
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_output_files("sequenza", "gcreference")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_log_file_gcreference(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_log_file() - action 'gcreference'"""
+    base_name = "work/static_data/log/sequenza.50"
+    expected = get_expected_log_files_dict(base_out=base_name)
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_log_file("sequenza", "gcreference")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_input_files_run(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_input_files() - action 'run'"""
+    wildcards = Wildcards(fromdict={"mapper": "bwa", "library_name": "P001-T1-DNA1-WGS1"})
+    expected = {
+        "gc": "work/static_data/out/sequenza.50.wig.gz",
+        "normal_bam": "NGS_MAPPING/output/bwa.P001-N1-DNA1-WGS1/out/bwa.P001-N1-DNA1-WGS1.bam",
+        "normal_bai": "NGS_MAPPING/output/bwa.P001-N1-DNA1-WGS1/out/bwa.P001-N1-DNA1-WGS1.bam.bai",
+        "tumor_bam": "NGS_MAPPING/output/bwa.P001-T1-DNA1-WGS1/out/bwa.P001-T1-DNA1-WGS1.bam",
+        "tumor_bai": "NGS_MAPPING/output/bwa.P001-T1-DNA1-WGS1/out/bwa.P001-T1-DNA1-WGS1.bam.bai",
+    }
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_input_files("sequenza", "run")(wildcards)
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_output_files_run(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_output_files() - action 'run'"""
+    expected = {
+        "seqz": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz",
+        "seqz_md5": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz.md5",
+    }
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_output_files("sequenza", "run")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_log_file_run(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_log_file() - action 'run'"""
+    base_name = "work/{mapper}.sequenza.{library_name}/log/{mapper}.sequenza.{library_name}.run"
+    expected = get_expected_log_files_dict(base_out=base_name)
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_log_file("sequenza", "run")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_input_files_report(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_input_files() - action 'report'"""
+    expected = {
+        "packages": "work/R_packages/out/sequenza.done",
+        "seqz": "work/{mapper}.sequenza.{library_name}/out/{mapper}.sequenza.{library_name}.seqz.gz",
+    }
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_input_files("sequenza", "report")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_output_files_report(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_output_files() - action 'report'"""
+    expected = {"done": "work/{mapper}.sequenza.{library_name}/report/.done"}
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_output_files("sequenza", "report")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_params_report(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_params() - action 'report'"""
+    wildcards = Wildcards(fromdict={"mapper": "bwa", "library_name": "P001-T1-DNA1-WGS1"})
+    expected = "P001-T1-DNA1-WGS1"
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_params("sequenza", "report")(wildcards)
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_log_file_report(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_log_file() - action 'report'"""
+    base_name = "work/{mapper}.sequenza.{library_name}/log/{mapper}.sequenza.{library_name}.report"
+    expected = get_expected_log_files_dict(base_out=base_name)
+    actual = somatic_targeted_seq_cnv_calling_workflow.get_log_file("sequenza", "report")
+    assert actual == expected
+
+
+def test_sequenza_step_part_get_resource_usage_call(somatic_targeted_seq_cnv_calling_workflow):
+    """Tests SequenzaStepPart.get_resource_usage()"""
+    # Define expected
+    expected_dicts = {
+        "run": {"threads": 4, "time": "24:00:00", "memory": "16G", "partition": "medium"},
+    }
+    # Evaluate
+    for action, resources in expected_dicts.items():
+        for resource, expected in resources.items():
+            msg_error = f"Assertion error for resource '{resource}' in '{action}'."
+            actual = somatic_targeted_seq_cnv_calling_workflow.get_resource(
+                "sequenza", action, resource
+            )
+            assert actual == expected, msg_error
+
+
 # Tests for SomaticTargetedSeqCnvCallingWorkflow --------------------------------------------------
 
 
 def test_somatic_targeted_seq_cnv_calling_workflow(somatic_targeted_seq_cnv_calling_workflow):
     """Test simple functionality of the workflow"""
     # Check created sub steps
-    expected = ["cnvetti_off_target", "cnvetti_on_target", "cnvkit", "copywriter", "link_out"]
+    expected = [
+        "cnvetti_off_target",
+        "cnvetti_on_target",
+        "cnvkit",
+        "copywriter",
+        "link_out",
+        "sequenza",
+    ]
     actual = list(sorted(somatic_targeted_seq_cnv_calling_workflow.sub_steps.keys()))
     assert actual == expected
 
@@ -928,6 +1050,16 @@ def test_somatic_targeted_seq_cnv_calling_workflow(somatic_targeted_seq_cnv_call
             "segments.txt.md5",
         )
     ]
+    # sequenza
+    tpl = (
+        "output/bwa.sequenza.P00{i}-T{t}-DNA1-WGS1/out/"
+        "bwa.sequenza.P00{i}-T{t}-DNA1-WGS1.seqz.gz{checksum}"
+    )
+    expected += [
+        tpl.format(i=i, t=t, checksum=checksum)
+        for i, t in ((1, 1), (2, 1), (2, 2))
+        for checksum in ("", ".md5")
+    ]
     expected = list(sorted(expected))
     actual = list(sorted(somatic_targeted_seq_cnv_calling_workflow.get_result_files()))
     # HACK TODO