Skip to content

Commit

Permalink
feat: Initial implmentation of Molecular bar codes handling using AGeNT
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 committed Oct 31, 2023
1 parent 4874074 commit a3f171d
Show file tree
Hide file tree
Showing 6 changed files with 512 additions and 7 deletions.
22 changes: 22 additions & 0 deletions snappy_pipeline/workflows/ngs_mapping/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,28 @@ rule ngs_mapping_bwa_mem2_run:
wf.wrapper_path("bwa_mem2")


# Run Molecular Barcodes meta-tool --------------------------------------------


rule ngs_mapping_mbcs_run:
input:
wf.get_input_files("mbcs", "run"),
output:
**wf.get_output_files("mbcs", "run"),
threads: wf.get_resource("mbcs", "run", "threads")
resources:
time=wf.get_resource("mbcs", "run", "time"),
memory=wf.get_resource("mbcs", "run", "memory"),
partition=wf.get_resource("mbcs", "run", "partition"),
tmpdir=wf.get_resource("mbcs", "run", "tmpdir"),
params:
args=wf.substep_dispatch("mbcs", "get_args", "run"),
log:
**wf.get_log_file("mbcs", "run"),
wrapper:
wf.wrapper_path("mbcs")


# Run STAR --------------------------------------------------------------------


Expand Down
85 changes: 84 additions & 1 deletion snappy_pipeline/workflows/ngs_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@
EXT_NAMES = ("bam", "bai", "bam_md5", "bai_md5")

#: Available read mappers for (short/Illumina) DNA-seq data
READ_MAPPERS_DNA = ("bwa", "bwa_mem2")
READ_MAPPERS_DNA = ("bwa", "bwa_mem2", "mbcs")

#: Available read mappers for (short/Illumina) RNA-seq data
READ_MAPPERS_RNA = ("star",)
Expand Down Expand Up @@ -389,6 +389,21 @@
trim_adapters: false
mask_duplicates: true
split_as_secondary: true # -M flag
# Configuration for mbcs (meta sub-step to deal with Molecular Barcodes & base uqality recalibration)
mbcs:
mapping_tool: REQUIRED # Either bwa of bwa_mem2. The indices & other parameters are taken from mapper config
mbc_tool: agent # Only agent currently implemented
agent:
prepare:
path: REQUIRED
lib_prep_type: REQUIRED # One of "halo" (HaloPlex), "hs" (HaloPlexHS), "xt" (SureSelect XT, XT2, XT HS), "v2" (SureSelect XT HS2) & "qxt" (SureSelect QXT)
extra_args: [] # Consider "-polyG 8" for NovaSeq data & "-minFractionRead 50" for 100 cycles data
mark_duplicates:
path: REQUIRED
consensus_mode: REQUIRED # One of "SINGLE", "HYBRID", "DUPLEX"
input_filter_args: []
consensus_filter_args: []
extra_args: []
# Configuration for STAR
star:
path_index: REQUIRED # Required if listed in ngs_mapping.tools.rna; otherwise, can be removed.
Expand Down Expand Up @@ -760,6 +775,73 @@ def check_config(self):
)


class MBCsStepPart(ReadMappingStepPart):
"""Support for performing NGS alignment on MBC data"""

name = "mbcs"
tool_category = "dna"

LIB_PREP_TYPES = ("halo", "hs", "xt", "v2", "qxt")
CONSENSUS_MODES = ("SINGLE", "HYBRID", "DUPLEX")

def get_resource_usage(self, action):
"""Get Resource Usage
:param action: Action (i.e., step) in the workflow, example: 'run'.
:type action: str
:return: Returns ResourceUsage for step.
:raises UnsupportedActionException: if action not in class defined list of valid actions.
"""
self._validate_action(action)
return ResourceUsage(
threads=1,
time="24:00:00",
memory="4G",
)

def check_config(self):
"""Check parameters in configuration.
Method checks that all parameters required to execute BWA-MEM2 are present in the
configuration. It further checks that the provided index has all the expected file
extensions. If invalid configuration, it raises InvalidConfiguration exception.
"""
# Check if tool is at all included in workflow
if self.__class__.name not in self.config["tools"]["dna"]:
return # mbcs not run, don't check configuration # pragma: no cover

# Check mapper
mapper = self.config["mbcs"]["mapping_tool"]
assert mapper != "mbcs" and mapper in READ_MAPPERS_DNA, f'Unknown mapper "{mapper}"'
self.parent.sub_steps[mapper].check_config()

# Check trimmer & creak paths
path = self.config["mbcs"]["agent"]["prepare"]["path"]
if not os.path.exists(path):
raise InvalidConfiguration(
f"Expected agent's trimmer input path {path} does not exist!"
)
path = self.config["mbcs"]["agent"]["mark_duplicates"]["path"]
if not os.path.exists(path):
raise InvalidConfiguration(f"Expected agent's creak input path {path} does not exist!")

# Check mandatory options
option = self.config["mbcs"]["agent"]["prepare"]["lib_prep_type"]
if option not in self.__class__.LIB_PREP_TYPES:
options = '", "'.join(self.__class__.LIB_PREP_TYPES)
raise InvalidConfiguration(
f'Unkown library preparation type "{option}", valid options are "{options}"'
)
option = self.config["mbcs"]["agent"]["mark_duplicates"]["consensus_mode"]
if option not in self.__class__.CONSENSUS_MODES:
options = '", "'.join(self.__class__.CONSENSUS_MODES)
raise InvalidConfiguration(
f'Unkown consensus mode "{option}", valid options are "{options}"'
)


class StarStepPart(ReadMappingStepPart):
"""Support for performing NGS alignment using STAR"""

Expand Down Expand Up @@ -1362,6 +1444,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
(
BwaStepPart,
BwaMem2StepPart,
MBCsStepPart,
ExternalStepPart,
LinkInStep,
Minimap2StepPart,
Expand Down
13 changes: 13 additions & 0 deletions snappy_wrappers/wrappers/mbcs/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
- bioconda
dependencies:
- cffi
- pandas
- numpy
- snakemake-minimal
- openjdk =17
- seqtk
- samtools
- bwa-mem2 ==2.2.1
- gatk4
Loading

0 comments on commit a3f171d

Please sign in to comment.