Skip to content

Commit

Permalink
feat: add missense TMB calculation (#432)
Browse files Browse the repository at this point in the history
Co-authored-by: giacuong171 <[email protected]>
  • Loading branch information
giacuong171 and giacuong171 authored Aug 30, 2023
1 parent 35b24cd commit 4fd0c73
Show file tree
Hide file tree
Showing 6 changed files with 405 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
EXT_NAMES = ("vcf", "vcf_tbi", "vcf_md5", "vcf_tbi_md5")

#: Names of the annotator tools
TOOLS = ("jannovar", "vep")
ANNOTATION_TOOLS = ("jannovar", "vep")

#: Default configuration for the somatic_variant_calling step
DEFAULT_CONFIG = r"""
Expand Down Expand Up @@ -336,7 +336,7 @@ def get_result_files(self):
annotators = list(
map(
lambda x: x.replace("jannovar", "jannovar_annotate_somatic_vcf"),
set(self.config["tools"]) & set(TOOLS),
set(self.config["tools"]) & set(ANNOTATION_TOOLS),
)
)
callers = set(self.config["tools_somatic_variant_calling"])
Expand Down
2 changes: 2 additions & 0 deletions snappy_pipeline/workflows/tumor_mutational_burden/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ rule tumor_mutational_burden_calculation:
memory=wf.get_resource("tmb_gathering", "run", "memory"),
partition=wf.get_resource("tmb_gathering", "run", "partition"),
tmpdir=wf.get_resource("tmb_gathering", "run", "tmpdir"),
params:
**{"args": wf.get_params("tmb_gathering", "run")},
log:
**wf.get_log_file("tmb_gathering", "run"),
wrapper:
Expand Down
184 changes: 130 additions & 54 deletions snappy_pipeline/workflows/tumor_mutational_burden/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
from snakemake.io import expand

from snappy_pipeline.base import UnsupportedActionException
from snappy_pipeline.utils import dictify, listify
from snappy_pipeline.workflows.abstract import BaseStep, BaseStepPart, LinkOutStepPart
from snappy_pipeline.workflows.ngs_mapping import NgsMappingWorkflow, ResourceUsage
from snappy_pipeline.workflows.somatic_variant_annotation import (
ANNOTATION_TOOLS,
SomaticVariantAnnotationWorkflow,
)
from snappy_pipeline.workflows.somatic_variant_calling import (
SOMATIC_VARIANT_CALLERS_MATCHED,
SomaticVariantCallingWorkflow,
Expand All @@ -24,10 +27,13 @@
DEFAULT_CONFIG = r"""
step_config:
tumor_mutational_burden:
path_somatic_variant_calling: ../somatic_variant_calling # REQUIRED
has_annotation: 'TRUE' # REQUIRED
path_somatic_variant: ../somatic_variant_annotation # REQUIRED
tools_ngs_mapping: [] # default to those configured for ngs_mapping
tools_somatic_variant_calling: [] # default to those configured for somatic_variant_calling
tools_somatic_variant_annotation: [] # default to those configured for somatic_variant_annotation
target_regions: # REQUIRED
missense_regex: '.*[\|&]missense_variant[\|&].*' #change if the annotation tool doesn't use 'missense_variant' to indicate missense variant
"""


Expand Down Expand Up @@ -56,35 +62,59 @@ def __init__(self, parent):
@dictify
def get_input_files(self, action):
self._validate_action(action)
tpl = (
"output/{mapper}.{var_caller}.{tumor_library}/out/"
"{mapper}.{var_caller}.{tumor_library}"
)
# Adding part for runnng with annotation file instead of with variant calling file
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
tpl = (
"output/{mapper}.{var_caller}.{anno_tool}.{tumor_library}/out/"
"{mapper}.{var_caller}.{anno_tool}.{tumor_library}"
)
else:
tpl = (
"output/{mapper}.{var_caller}.{tumor_library}/out/"
"{mapper}.{var_caller}.{tumor_library}"
)

key_ext = {"vcf": ".vcf.gz", "vcf_tbi": ".vcf.gz.tbi"}
variant_calling = self.parent.sub_workflows["somatic_variant_calling"] # read
# Adding part for runnng with annotation file instead of with variant calling file
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
variant_path = self.parent.sub_workflows["somatic_variant_annotation"]
else:
variant_path = self.parent.sub_workflows["somatic_variant_calling"]
for key, ext in key_ext.items():
yield key, variant_calling(tpl + ext)
yield key, variant_path(tpl + ext)

@dictify
def get_output_files(self, action):
# Validate action
self._validate_action(action)
prefix = (
"work/{mapper}.{var_caller}.tmb.{tumor_library}/out/"
"{mapper}.{var_caller}.tmb.{tumor_library}"
)
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
prefix = (
"work/{mapper}.{var_caller}.{anno_tool}.tmb.{tumor_library}/out/"
"{mapper}.{var_caller}.{anno_tool}.tmb.{tumor_library}"
)
else:
prefix = (
"work/{mapper}.{var_caller}.tmb.{tumor_library}/out/"
"{mapper}.{var_caller}.tmb.{tumor_library}"
)
key_ext = {"json": ".json"}
for key, ext in key_ext.items():
yield key, prefix + ext
yield key + "_md5", prefix + ext + ".md5"

@dictify
def _get_log_file(self, action):
assert action == "run"
prefix = (
"work/{mapper}.{var_caller}.tmb.{tumor_library}/log/"
"{mapper}.{var_caller}.tmb.{tumor_library}"
)
self._validate_action(action)
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
prefix = (
"work/{mapper}.{var_caller}.{anno_tool}.tmb.{tumor_library}/log/"
"{mapper}.{var_caller}.{anno_tool}.tmb.{tumor_library}"
)
else:
prefix = (
"work/{mapper}.{var_caller}.tmb.{tumor_library}/log/"
"{mapper}.{var_caller}.tmb.{tumor_library}"
)

key_ext = (
("log", ".log"),
Expand All @@ -95,23 +125,23 @@ def _get_log_file(self, action):
yield key, prefix + ext

def get_resource_usage(self, action):
"""Get Resource Usage
:param action: Action (i.e., step) in the workflow, example: 'run'.
:type action: str
:return: Returns ResourceUsage for step.
:raises UnsupportedActionException: if action not in class defined list of valid actions.
"""
if action not in self.actions:
actions_str = ", ".join(self.actions)
error_message = f"Action '{action}' is not supported. Valid options: {actions_str}"
raise UnsupportedActionException(error_message)
self._validate_action(action)
mem_mb = 4 * 1024 # 4GB
return ResourceUsage(
threads=2,
time="1:00:00", # 1 hour
memory=f"{mem_mb}M",
)

def get_params(self, action):
self._validate_action(action)
return getattr(self, "_get_params_run")

def _get_params_run(self, wildcards):
return {
"missense_re": self.w_config["step_config"]["tumor_mutational_burden"]["missense_regex"]
}


class TumorMutationalBurdenCalculationWorkflow(BaseStep):
"""Perform TMB calculation"""
Expand All @@ -134,15 +164,27 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
config_lookup_paths,
config_paths,
workdir,
(SomaticVariantCallingWorkflow, NgsMappingWorkflow),
(SomaticVariantCallingWorkflow, SomaticVariantAnnotationWorkflow, NgsMappingWorkflow),
)
# Register sub step classes so the sub steps are available
self.register_sub_step_classes((TumorMutationalBurdenCalculationStepPart, LinkOutStepPart))
# Register sub workflows
self.register_sub_workflow(
"somatic_variant_calling",
self.w_config["step_config"]["tumor_mutational_burden"]["path_somatic_variant_calling"],
)
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
self.register_sub_workflow(
"somatic_variant_annotation",
self.w_config["step_config"]["tumor_mutational_burden"]["path_somatic_variant"],
)
if not self.w_config["step_config"]["tumor_mutational_burden"][
"tools_somatic_variant_annotation"
]:
self.w_config["step_config"]["tumor_mutational_burden"][
"tools_somatic_variant_annotation"
] = self.w_config["step_config"]["somatic_variant_annotation"]["tools"]
else:
self.register_sub_workflow(
"somatic_variant_calling",
self.w_config["step_config"]["tumor_mutational_burden"]["path_somatic_variant"],
)
# Copy over "tools" setting from somatic_variant_calling/ngs_mapping if not set here
if not self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"]:
self.w_config["step_config"]["tumor_mutational_burden"][
Expand All @@ -160,26 +202,55 @@ def get_result_files(self):
callers = set(
self.w_config["step_config"]["tumor_mutational_burden"]["tools_somatic_variant_calling"]
)
name_pattern = "{mapper}.{caller}.tmb.{tumor_library.name}"
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)
if self.w_config["step_config"]["tumor_mutational_burden"]["has_annotation"] == "TRUE":
anno_callers = set(
self.w_config["step_config"]["tumor_mutational_burden"][
"tools_somatic_variant_annotation"
]
)
name_pattern = "{mapper}.{caller}.{anno_caller}.tmb.{tumor_library.name}"
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
anno_caller=anno_callers & set(ANNOTATION_TOOLS),
ext=EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
anno_caller=anno_callers & set(ANNOTATION_TOOLS),
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)
else:
name_pattern = "{mapper}.{caller}.tmb.{tumor_library.name}"
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "out", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=EXT_VALUES,
)
yield from self._yield_result_files_matched(
os.path.join("output", name_pattern, "log", name_pattern + "{ext}"),
mapper=self.w_config["step_config"]["tumor_mutational_burden"]["tools_ngs_mapping"],
caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
ext=(
".log",
".log.md5",
".conda_info.txt",
".conda_info.txt.md5",
".conda_list.txt",
".conda_list.txt.md5",
),
)

def _yield_result_files_matched(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
Expand Down Expand Up @@ -208,12 +279,17 @@ def _yield_result_files_matched(self, tpl, **kwargs):
def check_config(self):
"""Check that the path to the NGS mapping is present"""
self.ensure_w_config(
("step_config", "tumor_mutational_burden", "path_somatic_variant_calling"),
"Path to variant calling not configured but required for tmb calculation",
("step_config", "tumor_mutational_burden", "path_somatic_variant"),
"Path to variant (directory of vcf files) not configured but required for tmb calculation",
)

self.ensure_w_config(
("step_config", "tumor_mutational_burden", "target_regions"),
"Path to target_regions file (bed format)"
"not configured but required for tmb calculation",
)

self.ensure_w_config(
("step_config", "tumor_mutational_burden", "has_annotation"),
"TMB needs to know wether the vcf is annotated or not",
)
62 changes: 49 additions & 13 deletions snappy_wrappers/wrappers/bcftools/TMB/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
__author__ = "Pham Gia Cuong"
__email__ = "[email protected]"

missense_re = (
snakemake.params.args["missense_re"]
if "args" in snakemake.params.keys() and "missense_re" in snakemake.params.args.keys()
else ""
)

shell(
r"""
# -----------------------------------------------------------------------------
Expand All @@ -33,23 +39,53 @@
number_indels=$(bcftools view -R $bed_file -v indels --threads 2 -H {snakemake.input.vcf}| wc -l)
number_variants=$(bcftools view -R $bed_file --threads 2 -H {snakemake.input.vcf}| wc -l)
if [[ -n "{missense_re}" ]]
then
number_missense_variants=$(bcftools view -R $bed_file --threads 2 -H {snakemake.input.vcf}| grep -E '{missense_re}' | wc -l)
fi
TMB=`echo "1000000*($number_variants/$total_exom_length)" | bc -l `
cat << EOF > {snakemake.output.json}
{
"Library_name": {snakemake.wildcards.tumor_library},
"VCF_file": $name_vcf,
"VCF_md5": $vcf_md5,
"BED_file": $bed_file_name,
"BED_md5": $bed_md5,
"TMB": $TMB,
"Number_variants": $number_variants,
"Number_snvs": $number_snvs,
"Number_indels": $number_indels,
"Total_regions_length": $total_exom_length
missense_TMB=`echo "1000000*($number_missense_variants/$total_exom_length)" | bc -l `
if [[ {snakemake.config[step_config][tumor_mutational_burden][has_annotation]} == "TRUE" ]]
then
out_file=$(cat << EOF
{{
"Library_name": {snakemake.wildcards.tumor_library},
"VCF_file": $name_vcf,
"VCF_md5": $vcf_md5,
"BED_file": $bed_file_name,
"BED_md5": $bed_md5,
"TMB": $TMB,
"missense_TMB": $missense_TMB,
"Number_variants": $number_variants,
"Number_snvs": $number_snvs,
"Number_indels": $number_indels,
"Total_regions_length": $total_exom_length
}}
EOF
)
echo $out_file > {snakemake.output.json}
else
out_file=$(cat << EOF
{{
"Library_name": {snakemake.wildcards.tumor_library},
"VCF_file": $name_vcf,
"VCF_md5": $vcf_md5,
"BED_file": $bed_file_name,
"BED_md5": $bed_md5,
"TMB": $TMB,
"Number_variants": $number_variants,
"Number_snvs": $number_snvs,
"Number_indels": $number_indels,
"Total_regions_length": $total_exom_length
}}
EOF
)
echo $out_file > {snakemake.output.json}
fi
pushd $(dirname {snakemake.output.json})
md5sum $(basename {snakemake.output.json}) > $(basename {snakemake.output.json_md5})
}
"""
)

Expand Down
3 changes: 0 additions & 3 deletions snappy_wrappers/wrappers/vep/run/environment.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- python
- ensembl-vep=102
- htslib

Loading

0 comments on commit 4fd0c73

Please sign in to comment.