Skip to content

Commit

Permalink
extra tools (#21)
Browse files Browse the repository at this point in the history
* gatk4 versions, add latest version

* new gatk4 tool - mergebamalignment

* syntax fixes

* naming error

* new tool: GATK4 FastqToSam

* new tool: GATK4 FastqToSam

* fix merge error

* new tool: bamsormadup

* new tool: io_lib Scramble

* import and formatting fixes

* fix version imports

* fix bamsormadup tool args

* bamsormadup: fix docker url

* fix scramble tool args

* scramble: fix output type

* remove bool type from ToolArgument

* bump gatk4 version

* update fastaFai type, usually fai and dict are required together in workflows

* Revert "update fastaFai type, usually fai and dict are required together in workflows"

This reverts commit be8d880.
  • Loading branch information
matthdsm authored Mar 6, 2020
1 parent cbd1ca8 commit 2844e5a
Show file tree
Hide file tree
Showing 19 changed files with 1,266 additions and 17 deletions.
2 changes: 2 additions & 0 deletions janis_bioinformatics/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
)
from janis_bioinformatics.tools import (
babrahambioinformatics,
biobambam,
bcftools,
bwa,
common,
Expand All @@ -14,6 +15,7 @@
gatk4,
htslib,
illumina,
io_lib,
multiqc,
papenfuss,
pmac,
Expand Down
1 change: 1 addition & 0 deletions janis_bioinformatics/tools/biobambam/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .bamsormadup.versions import *
Empty file.
175 changes: 175 additions & 0 deletions janis_bioinformatics/tools/biobambam/bamsormadup/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from abc import ABC
from typing import Any, Dict

from janis_core import (
ToolInput,
ToolArgument,
WildcardSelector,
Int,
Float,
Boolean,
String,
ToolOutput,
Filename,
InputSelector,
CaptureType,
CpuSelector,
Stdout,
get_value_for_hints_and_ordered_resource_tuple,
ToolMetadata,
)

from janis_bioinformatics.data_types import Sam, FastaWithDict, FastqGzPair, Bam, File
from janis_bioinformatics.tools.bioinformaticstoolbase import BioinformaticsTool

BAMSORMADUP_MEM_TUPLE = [
(
CaptureType.key(),
{
CaptureType.TARGETED: 8,
CaptureType.EXOME: 12,
CaptureType.CHROMOSOME: 12,
CaptureType.THIRTYX: 16,
CaptureType.NINETYX: 20,
CaptureType.THREEHUNDREDX: 24,
},
)
]

BAMSORMADUP_CORES_TUPLE = [
(
CaptureType.key(),
{
CaptureType.TARGETED: 16,
CaptureType.EXOME: 20,
CaptureType.CHROMOSOME: 24,
CaptureType.THIRTYX: 30,
CaptureType.NINETYX: 32,
CaptureType.THREEHUNDREDX: 32,
},
)
]


class BamSorMaDupBase(BioinformaticsTool, ABC):
def tool(self):
return "bamsormadup"

def friendly_name(self):
return "BamSorMaDup"

def tool_provider(self):
return "BioBamBam"

def base_command(self):
return ["bamsormadup"]

def inputs(self):
return [
ToolInput("alignedReads", Bam(), position=200),
ToolInput("outputFilename", Filename(extension=".bam")),
*BamSorMaDupBase.additional_inputs,
]

def arguments(self):
return [
ToolArgument(
"metrics.txt",
prefix="M=",
separate_value_from_prefix=False,
doc="file containing metrics from duplicate removal",
),
ToolArgument(
"bam",
prefix="inputformat=",
separate_value_from_prefix=False,
doc="input data format",
),
ToolArgument(
"bam",
prefix="outputFormat=",
separate_value_from_prefix=False,
doc="output data format",
),
]

def outputs(self):
return [
ToolOutput(
"out", Stdout(Bam(), stdoutname=InputSelector("outputFilename"))
),
ToolOutput("metrics", File(), glob=WildcardSelector("metrics.txt")),
]

def memory(self, hints: Dict[str, Any]):
val = get_value_for_hints_and_ordered_resource_tuple(
hints, BAMSORMADUP_MEM_TUPLE
)
if val:
return val
return 16

def cpus(self, hints: Dict[str, Any]):
val = get_value_for_hints_and_ordered_resource_tuple(
hints, BAMSORMADUP_CORES_TUPLE
)
if val:
return val
return 4

def bind_metadata(self):
from datetime import date

return ToolMetadata(
contributors=["Matthias De Smet (@mattdsm)"],
dateCreated=date(2020, 2, 26),
dateUpdated=date(2020, 2, 26),
institution="None",
doi=None,
keywords=["duplicates", "sort"],
documentationUrl="https://gitlab.com/german.tischler/biobambam2",
documentation="bamsormadup: parallel sorting and duplicate marking",
)

additional_inputs = [
ToolInput(
"level",
Int(optional=True),
prefix="level=",
separate_value_from_prefix=False,
default=0,
doc="compression settings for output bam file (-1=zlib default,0=uncompressed,1=fast,9=best)",
),
ToolInput(
"tempLevel",
Int(optional=True),
prefix="templevel=",
separate_value_from_prefix=False,
default=0,
doc="compression settings for temporary bam files (-1=zlib default,0=uncompressed,1=fast,9=best)",
),
ToolInput(
"threads",
Int(optional=True),
default=CpuSelector(),
prefix="threads=",
separate_value_from_prefix=False,
doc="Number of threads. (default = 1)",
),
ToolInput(
"sortOrder",
String(optional=True),
prefix="SO=",
separate_value_from_prefix=False,
default="coordinate",
doc="output sort order(coordinate by default)",
),
ToolInput(
"optMinPixelDif",
Int(optional=True),
prefix="optminpixeldif=",
separate_value_from_prefix=False,
default=2500,
doc="pixel difference threshold for optical duplicates (patterned flowcell: 12000, unpatterned flowcell: 2500)",
),
]
13 changes: 13 additions & 0 deletions janis_bioinformatics/tools/biobambam/bamsormadup/versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ..versions import BioBamBam_2_0_87
from .base import BamSorMaDupBase


class BamSorMaDup_2_0_87(BioBamBam_2_0_87, BamSorMaDupBase):
pass


BamSorMaDupLatest = BamSorMaDup_2_0_87


if __name__ == "__main__":
print(BamSorMaDupLatest().help())
12 changes: 12 additions & 0 deletions janis_bioinformatics/tools/biobambam/versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from abc import ABC


class BioBamBam_2_0_87(ABC):
def container(self):
return "quay.io/biocontainers/biobambam:2.0.87--1"

def version(self):
return "2.0.87"


BioBamBamLatest = BioBamBam_2_0_87
32 changes: 16 additions & 16 deletions janis_bioinformatics/tools/gatk4/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,26 @@
Gatk4CalculateContamination_4_1_4,
Gatk4CalculateContaminationLatest,
)
from .createsequencedictionary.versions import (
Gatk4CreateSequenceDictionary_4_1_2,
Gatk4CreateSequenceDictionary_4_1_3,
Gatk4CreateSequenceDictionary_4_1_4,
Gatk4CreateSequenceDictionaryLatest,
)
from .fastqtosam.versions import *
from .filtermutectcalls.versions import (
Gatk4FilterMutectCalls_4_1_2,
Gatk4FilterMutectCalls_4_1_3,
Gatk4FilterMutectCalls_4_1_4,
Gatk4FilterMutectCallsLatest,
)
from .gathervcfs.versions import (
Gatk4GatherVcfs_4_0,
Gatk4GatherVcfs_4_1_2,
Gatk4GatherVcfs_4_1_3,
Gatk4GatherVcfs_4_1_4,
Gatk4GatherVcfsLatest,
)
from .genotypeconcordance.versions import (
Gatk4GenotypeConcordance_4_0,
Gatk4GenotypeConcordance_4_1_2,
Expand Down Expand Up @@ -57,6 +71,7 @@
Gatk4MarkDuplicates_4_1_4,
Gatk4MarkDuplicatesLatest,
)
from .mergebamalignment.versions import *
from .mergemutectstats.versions import (
Gatk4MergeMutectStats_4_1_2,
Gatk4MergeMutectStats_4_1_3,
Expand Down Expand Up @@ -91,24 +106,9 @@
Gatk4SortSam_4_1_4,
Gatk4SortSamLatest,
)
from .gathervcfs.versions import (
Gatk4GatherVcfs_4_0,
Gatk4GatherVcfs_4_1_2,
Gatk4GatherVcfs_4_1_3,
Gatk4GatherVcfs_4_1_4,
Gatk4GatherVcfsLatest,
)

from .splitreads.versions import (
Gatk4SortSamLatest,
Gatk4SplitReads_4_1_2,
Gatk4SplitReads_4_1_3,
Gatk4SplitReads_4_1_4,
Gatk4SortSamLatest,
)

from .createsequencedictionary.versions import (
Gatk4CreateSequenceDictionary_4_1_2,
Gatk4CreateSequenceDictionary_4_1_3,
Gatk4CreateSequenceDictionary_4_1_4,
Gatk4CreateSequenceDictionaryLatest,
)
Empty file.
Loading

0 comments on commit 2844e5a

Please sign in to comment.