Skip to content

Commit

Permalink
Merge branch 'main' into wrapper-versioning
Browse files Browse the repository at this point in the history
  • Loading branch information
tedil committed Apr 18, 2024
2 parents 6748046 + e008e7e commit 7796eed
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 112 deletions.
22 changes: 11 additions & 11 deletions snappy_pipeline/workflows/somatic_msi_calling/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,21 @@ rule somatic_msi_calling_link_out_run:

# Somatic Microsatellite Instability analysis~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Run MANTIS ------------------------------------------------------------------
# Run MANTIS_msi2 ------------------------------------------------------------------


rule somatic_msi_calling_mantis:
rule somatic_msi_calling_mantis_msi2:
input:
unpack(wf.get_input_files("mantis", "run")),
unpack(wf.get_input_files("mantis_msi2", "run")),
output:
**wf.get_output_files("mantis", "run"),
threads: wf.get_resource("mantis", "run", "threads")
**wf.get_output_files("mantis_msi2", "run"),
threads: wf.get_resource("mantis_msi2", "run", "threads")
resources:
time=wf.get_resource("mantis", "run", "time"),
memory=wf.get_resource("mantis", "run", "memory"),
partition=wf.get_resource("mantis", "run", "partition"),
tmpdir=wf.get_resource("mantis", "run", "tmpdir"),
time=wf.get_resource("mantis_msi2", "run", "time"),
memory=wf.get_resource("mantis_msi2", "run", "memory"),
partition=wf.get_resource("mantis_msi2", "run", "partition"),
tmpdir=wf.get_resource("mantis_msi2", "run", "tmpdir"),
log:
wf.get_log_file("mantis", "run"),
**wf.get_log_file("mantis_msi2", "run"),
wrapper:
wf.wrapper_path("mantis/run")
wf.wrapper_path("mantis/mantis_msi2/run")
167 changes: 112 additions & 55 deletions snappy_pipeline/workflows/somatic_msi_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
whole genomes, exomes or large panels). MANTIS starts from the aligned reads
(thus off ``ngs_mapping``) and generates a result file per tumor/normal pair.
As MANTIS is not maintained anymore, the pipeline now supports only
`MANTIS2 <https://github.com/nh13/MANTIS2>`_.
The new version appears to be very silimar to the old one, both in terms of input & output files,
and in terms of requirements.
==========
Step Input
==========
Expand All @@ -19,13 +24,13 @@
.. note:: Tool-Specific Output
As the only integrated tool is MANTIS at the moment, the output is very tailored to the result
As the only integrated tool is MANTIS2 at the moment, the output is very tailored to the result
of this tool. In the future, this section might contain "common" output and tool-specific
output sub sections.
- ``mantis.{mapper}.{lib_name}-{lib_pk}/out/``
- ``mantis.{mapper}.{lib_name}-{lib_pk}.results.txt``
- ``mantis.{mapper}.{lib_name}-{lib_pk}.results.txt.status``
- ``{mapper}.mantis_msi2.{lib_name}-{lib_pk}/out/``
- ``{mapper}.mantis_msi2.{lib_name}-{lib_pk}.results.txt``
- ``{mapper}.mantis_msi2.{lib_name}-{lib_pk}.results.txt.status``
=====================
Default Configuration
Expand All @@ -39,14 +44,15 @@
Available Somatic Targeted CNV Caller
=====================================
- ``mantis``
- ``mantis_msi2``
"""

from collections import OrderedDict
import os
import sys

from biomedsheets.shortcuts import CancerCaseSheet, is_not_background
from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
from snakemake.io import expand

from snappy_pipeline.utils import dictify, listify
Expand All @@ -60,30 +66,50 @@

__author__ = "Clemens Messerschmidt <[email protected]>"

#: Extensions of files to create as main payload
EXT_VALUES = (".results.txt", ".results.txt.status", ".results.txt.md5", ".results.txt.status.md5")

#: Names of the files to create for the extension
EXT_NAMES = ("result", "status", "result_md5", "status_md5")

EXT_MATCHED = {
"mantis_msi2": {
"result": ".results.txt",
"status": ".results.txt.status",
"result_md5": ".results.txt.md5",
"status_md5": ".results.txt.status.md5",
},
}

#: Available somatic variant callers assuming matched samples.
MSI_CALLERS_MATCHED = ("mantis_msi2",)


#: Default configuration for the somatic_msi_calling step
DEFAULT_CONFIG = r"""
# Default configuration somatic_msi_calling
step_config:
somatic_msi_calling:
tools: ['mantis'] # REQUIRED - available: 'mantis'
tools: ['mantis_msi2'] # REQUIRED - available: 'mantis_msi2'
path_ngs_mapping: ../ngs_mapping # REQUIRED
loci_bed: /fast/groups/cubi/projects/biotools/Mantis/appData/hg19/loci.bed # REQUIRED
loci_bed: "" # REQUIRED
# hg19: /fast/work/groups/cubi/projects/biotools/Mantis/appData/hg19/loci.bed
# hg38: /fast/work/groups/cubi/projects/biotools/Mantis/appData/hg38/GRCh38.d1.vd1.all_loci.bed
"""


class MantisStepPart(BaseStepPart):
"""Perform somatic microsatellite instability with MANTIS"""
class Mantis2StepPart(BaseStepPart):
"""Perform somatic microsatellite instability with MANTIS_msi2"""

#: Step name
name = "mantis"
name = "mantis_msi2"

#: Class available actions
actions = ("run",)

def __init__(self, parent):
super().__init__(parent)
self.base_path_out = (
"work/mantis.{{mapper}}.{{library_name}}/out/" "mantis.{{mapper}}.{{library_name}}{ext}"
"work/{{mapper}}.{msi_caller}.{{tumor_library}}/out/"
"{{mapper}}.{msi_caller}.{{tumor_library}}{ext}"
)
# Build shortcut from cancer bio sample name to matched cancer sample
self.tumor_ngs_library_to_sample_pair = OrderedDict()
Expand All @@ -92,12 +118,10 @@ def __init__(self, parent):
sheet.all_sample_pairs_by_tumor_dna_ngs_library
)

def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.library_name]
return pair.normal_sample.dna_ngs_library.name

def get_input_files(self, action):
# Validate action
self._validate_action(action)

def input_function(wildcards):
"""Helper wrapper function"""
# Get shorcut to Snakemake sub workflow
Expand All @@ -110,7 +134,7 @@ def input_function(wildcards):
)
)
tumor_base_path = (
"output/{mapper}.{library_name}/out/" "{mapper}.{library_name}"
"output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
).format(**wildcards)
return {
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
Expand All @@ -119,24 +143,38 @@ def input_function(wildcards):
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

assert action == "run", "Unsupported actions"
return input_function

@dictify
def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.normal_sample.dna_ngs_library.name

def get_output_files(self, action):
assert action == "run", "Unsupported actions"
exts = {"result": "results.txt", "status": "results.txt.status"}
for key, ext in exts.items():
yield key, (
"work/mantis.{{mapper}}.{{library_name}}/out/"
"mantis.{{mapper}}.{{library_name}}_{sfx}"
).format(sfx=ext)

@staticmethod
def get_log_file(action):
"""Return path to log file for the given action"""
_ = action
return "work/mantis.{mapper}.{library_name}/log/snakemake.mantis_run.log"
# Validate action
self._validate_action(action)
return dict(
zip(EXT_NAMES, expand(self.base_path_out, msi_caller=[self.name], ext=EXT_VALUES))
)

@dictify
def _get_log_file(self, action):
"""Return dict of log files."""
# Validate action
self._validate_action(action)

prefix = (
"work/{{mapper}}.{msi_caller}.{{tumor_library}}/log/"
"{{mapper}}.{msi_caller}.{{tumor_library}}"
).format(msi_caller=self.__class__.name)
key_ext = (
("log", ".log"),
("conda_info", ".conda_info.txt"),
("conda_list", ".conda_list.txt"),
)
for key, ext in key_ext:
yield key, prefix + ext
yield key + "_md5", prefix + ext + ".md5"

def get_resource_usage(self, action):
"""Get Resource Usage
Expand All @@ -150,7 +188,7 @@ def get_resource_usage(self, action):
self._validate_action(action)
return ResourceUsage(
threads=3,
time="08:00:00", # 2 hours
time="08:00:00", # 8 hours
memory=f"{30 * 1024 * 3}M",
)

Expand All @@ -164,9 +202,13 @@ class SomaticMsiCallingWorkflow(BaseStep):
#: Default biomed sheet class
sheet_shortcut_class = CancerCaseSheet

sheet_shortcut_kwargs = {
"options": CancerCaseSheetOptions(allow_missing_normal=True, allow_missing_tumor=True)
}

@classmethod
def default_config_yaml(cls):
"""Return default config YAML, to be overwritten by project-specific one"""
"""Return default config YAML, to be overwritten by project-specific one."""
return DEFAULT_CONFIG

def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir):
Expand All @@ -179,14 +221,41 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
(NgsMappingWorkflow,),
)
# Register sub step classes so the sub steps are available
self.register_sub_step_classes((MantisStepPart, LinkOutStepPart))
self.register_sub_step_classes((Mantis2StepPart, LinkOutStepPart))
# Initialize sub-workflows
self.register_sub_workflow("ngs_mapping", self.config["path_ngs_mapping"])

@listify
def get_result_files(self):
"""Return list of result files for the somatic targeted sequencing CNV calling step"""
tool_actions = {"mantis": ("run",)}
"""Return list of result files for the MSI calling workflow"""
name_pattern = "{mapper}.{msi_caller}.{tumor_library.name}"
for msi_caller in set(self.config["tools"]) & set(MSI_CALLERS_MATCHED):
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
msi_caller=msi_caller,
ext=EXT_MATCHED[msi_caller].values() if msi_caller in EXT_MATCHED else EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
msi_caller=msi_caller,
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)

def _yield_result_files_matched(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
This function returns the results from the matched msi callers such as
mantis.
"""
for sheet in filter(is_not_background, self.shortcut_sheets):
for sample_pair in sheet.all_sample_pairs:
if (
Expand All @@ -199,21 +268,9 @@ def get_result_files(self):
)
print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
continue
for tool in self.config["tools"]:
for action in tool_actions[tool]:
try:
tpls = self.sub_steps[tool].get_output_files(action).values()
except AttributeError:
tpls = self.sub_steps[tool].get_output_files(action)
for tpl in tpls:
filenames = expand(
tpl,
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
library_name=[sample_pair.tumor_sample.dna_ngs_library.name],
)
for f in filenames:
if ".tmp." not in f:
yield f.replace("work/", "output/")
yield from expand(
tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
)

def check_config(self):
"""Check that the necessary globalc onfiguration is present"""
Expand Down
2 changes: 1 addition & 1 deletion snappy_wrappers/wrappers/gcnv/contig_ploidy/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
export THEANO_FLAGS="base_compiledir=$TMPDIR/theano_compile_dir"
# Get contig name style
egrep "^@SQ\s+SN:chr[0-9XY]{1,2}\s+" {snakemake.input.interval_list} > /dev/null && true
egrep "^@SQ\s+SN:chr[0-9XY]{{1,2}}\s+" {snakemake.input.interval_list} > /dev/null && true
exit_status=$?
if [[ $exit_status == 0 ]]; then
STYLE="chr"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# Get contig name style
# Note: the sample_index in '{snakemake.input.ploidy}' and '{snakemake.input.calls[0]}' are NOT guaranteed to be the same
# -> Sample_0 should always exist and the contig names (which we care for) are the same
tail -n +2 $(dirname {snakemake.input.ploidy})/ploidy-calls/SAMPLE_0/contig_ploidy.tsv | egrep "^chr[0-9XY]{1,2}\s[0-9]\s[0-9.]+" > /dev/null && true
tail -n +2 $(dirname {snakemake.input.ploidy})/ploidy-calls/SAMPLE_0/contig_ploidy.tsv | egrep "^chr[0-9XY]{{1,2}}\s[0-9]\s[0-9.]+" > /dev/null && true
exit_status=$?
if [[ $exit_status == 0 ]]; then
STYLE="chr"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
fi
# Get contig name style; SAMPLE_0 is guaranteed to exist
tail -n +2 $(dirname {snakemake.input.ploidy})/ploidy-calls/SAMPLE_0/contig_ploidy.tsv | grep "^chr[0-9XY]{1,2}\s[0-9]\s[0-9.]+$" > /dev/null && true
tail -n +2 $(dirname {snakemake.input.ploidy})/ploidy-calls/SAMPLE_0/contig_ploidy.tsv | grep "^chr[0-9XY]{{1,2}}\s[0-9]\s[0-9.]+$" > /dev/null && true
exit_status=$?
if [[ $exit_status == 0 ]]; then
STYLE="chr"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- numpy ==1.6.2
- pysam ==0.8.3
- mantis-msi2
Loading

0 comments on commit 7796eed

Please sign in to comment.