Skip to content

Commit

Permalink
Adding SV callers SVABA & GRIDSS
Browse files Browse the repository at this point in the history
  • Loading branch information
tanubrata committed Oct 10, 2023
1 parent fce5d68 commit 770715d
Show file tree
Hide file tree
Showing 12 changed files with 385 additions and 21 deletions.
10 changes: 10 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ params {
vep_cache_version = 110
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
indel_mask = "${projectDir}/data/snowman_blacklist.bed"
germ_sv_db = "${projectDir}/data/snowman_germline_mini_160413.bed"
simple_seq_db = "${projectDir}/data/repeat_masker_hg19_Simple.bed"
blacklist_gridss = "${projectDir}/data/ENCFF001TDO_hg19_nochr.bed"
pon_gridss = "${projectDir}/data/GRIDSS/pon/hg19/"
}
'GATK.GRCh38' {
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta"
Expand Down Expand Up @@ -69,6 +74,11 @@ params {
vep_cache_version = 110
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
indel_mask = "${projectDir}/data/snowman_blacklist.hg38.bed"
germ_sv_db = "${projectDir}/data/snowman_germline_mini_hg38.bed"
simple_seq_db = "${projectDir}/data/repeat_masker_hg38_simple.bed"
blacklist_gridss = "${projectDir}/data/ENCFF356LFX_hg38.bed"
pon_gridss = "${projectDir}/data/GRIDSS/pon/hg38/"
}

'GRCh37' {
Expand Down
19 changes: 18 additions & 1 deletion conf/modules/structural_variants.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,25 @@ process {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/SV_calling/SVABA/${meta.id}/" },
pattern: "*{vcf.gz,txt.gz,vcf,bam}"
pattern: "*{vcf.gz,txt.gz,vcf*,bam}"
]
}

withName: 'NFCORE_HEISENBIO:HEISENBIO:BAM_SVCALLING_GRIDSS:GRIDSS_GRIDSS' {
ext.prefix = { "${meta.id}.gridss" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/SV_calling/GRIDSS/${meta.id}/" },
pattern: "*{vcf.gz,txt.gz,vcf*,bam}"
]
}

withName: 'NFCORE_HEISENBIO:HEISENBIO:BAM_SVCALLING_GRIDSS_SOMATIC:GRIDSS_SOMATIC' {
ext.prefix = { "${meta.id}.high_confidence_somatic" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/SV_calling/GRIDSS/${meta.id}/" },
pattern: "*{vcf.bgz, vcf.bgz.tbi}"
]
}
}
6 changes: 5 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ params.known_indels_vqsr = WorkflowMain.getGenomeAttribute(params, 'known_in
//params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version')
//params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome')
//params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species')

params.indel_mask = WorkflowMain.getGenomeAttribute(params, 'indel_mask')
params.germ_sv_db = WorkflowMain.getGenomeAttribute(params, 'germ_sv_db')
params.simple_seq_db = WorkflowMain.getGenomeAttribute(params, 'simple_seq_db')
params.blacklist_gridss = WorkflowMain.getGenomeAttribute(params, 'blacklist_gridss')
params.pon_gridss = WorkflowMain.getGenomeAttribute(params, 'pon_gridss')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
83 changes: 83 additions & 0 deletions modules/local/gridss/gridss/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

process GRIDSS_GRIDSS {
tag "$meta.id"
label 'process_medium'

conda "bioconda::gridss=2.13.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h270b39a_0':
'biocontainers/gridss:2.13.2--h270b39a_0' }"


input:
tuple val(meta), path(normalbam), path(normalbai), path(tumorbam), path(tumorbai) // required: [meta, normal_cram, normal_crai, tumor_cram, tumor_crai]
path(fasta) // required: reference fasta
path(fasta_fai)
path(bwa_index) // required: bwa index folder
path(blacklist_gridss) // optional: gridss blacklist bed file based on genome


output:
tuple val(meta), path("*.vcf.gz") , emit: vcf, optional:true
tuple val(meta), path("*.vcf.gz.tbi") , emit: vcf_index, optional:true
tuple val(meta), path("*.assembly.bam") , emit: assembly, optional:true
path "versions.yml" , emit: versions


when:
task.ext.when == null || task.ext.when


script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.13.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def assembly_bam = "--assembly ${meta.id}.assembly.bam"
def bwa = bwa_index ? "cp -s ${bwa_index}/* ." : ""
def blacklist = blacklist_gridss ? "--blacklist ${blacklist_gridss}" : ""

"""
${bwa}
gridss \\
--output ${prefix}.vcf.gz \\
--reference ${fasta} \\
--threads ${task.cpus} \\
$assembly_bam \\
$blacklist \\
--picardoptions VALIDATION_STRINGENCY=LENIENT \\
--jvmheap ${task.memory.toGiga() - 1}g \\
--otherjvmheap ${task.memory.toGiga() - 1}g \\
${normalbam} \\
${tumorbam}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gridss: ${VERSION}
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.13.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

def steps = args.contains("-s ") ? args.split('-s ')[-1].split(" ")[0] :
args.contains("--steps ") ? args.split('--steps ')[-1].split(" ")[0] :
"all"
def vcf = steps.contains("call") || steps.contains("all") ? "touch ${prefix}.vcf*" : ""
def assembly_bam = steps.contains("assembly") || steps.contains("all") ? "touch ${meta.id}.assembly.bam" : ""
"""
${vcf}
${assembly_bam}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gridss: ${VERSION}
END_VERSIONS
"""
}




62 changes: 62 additions & 0 deletions modules/local/gridss/somaticFilter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process GRIDSS_SOMATIC {
tag "$meta.id"
label 'process_medium'

conda "bioconda::gridss=2.13.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h270b39a_0':
'biocontainers/gridss:2.13.2--h270b39a_0' }"


input:
tuple val(meta), path(gridss_output)
path(pondir_gridss)

output:
tuple val(meta), path("*high_confidence_somatic.vcf.gz") , emit: somatic_high_vcf, optional:true
tuple val(meta), path("*high_and_low_confidence_somatic.vcf.gz") , emit: somatic_all_vcf, optional:true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.13.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def output1 = "${meta.id}.high_confidence_somatic.vcf.gz"
def output2 = "${meta.id}.high_and_low_confidence_somatic.vcf.gz"
//def pon = pondir_gridss ? "cp -s ${pondir_gridss}/* ." : ""
//def scriptDir = "dirname \$(which gridss_somatic_filter)".execute().text.trim()
"""
gridss_somatic_filter \\
--pondir ${pondir_gridss} \\
--input ${gridss_output} \\
--output $output1 \\
--fulloutput $output2 \\
-n 1 \\
-t 2
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gridss: ${VERSION}
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.13.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
touch ${prefix}.high_confidence_somatic.vcf.bgz
touch ${prefix}.high_and_low_confidence_somatic.vcf.bgz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gridss: ${VERSION}
END_VERSIONS
"""
}

7 changes: 6 additions & 1 deletion modules/local/svaba/main.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

process SVABA {
tag "$meta.id"
label 'process_medium'
Expand All @@ -16,9 +17,11 @@ process SVABA {
path dbsnp
path dbsnp_tbi
path indel_mask
path germ_sv_db
path simple_seq_db
val error_rate

output:
output:
tuple val(meta), path("*.svaba.sv.vcf.gz") , emit: sv, optional: true
tuple val(meta), path("*.svaba.indel.vcf.gz") , emit: indel, optional: true
tuple val(meta), path("*.svaba.germline.indel.vcf.gz") , emit: germ_indel, optional: true
Expand Down Expand Up @@ -47,6 +50,7 @@ process SVABA {
def dbsnp = dbsnp ? "--dbsnp-vcf ${dbsnp}" : ""
def bwa = bwa_index ? "cp -s ${bwa_index}/* ." : ""
def indel_mask = indel_mask ? "--blacklist ${indel_mask}" : ""
def flags = germ_sv_db ? "--germline-sv-database ${germ_sv_db} --simple-seq-database ${simple_seq_db}" : ""
def error_rate = error_rate ? "--error-rate ${error_rate}" : ""

"""
Expand All @@ -59,6 +63,7 @@ process SVABA {
$dbsnp \\
$indel_mask \\
$error_rate \\
$flags \\
--id-string $meta.id \\
--reference-genome $fasta \\
--g-zip \\
Expand Down
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ params {

// Structural Variant Calling
error_rate = 0.01 // Default error_rate for Svaba
indel_mask = null // Must provide blacklist bed file for indels based on genome to run Svaba
//indel_mask = null // Must provide blacklist bed file for indels based on genome to run Svaba


// Variant Calling
Expand Down Expand Up @@ -134,7 +134,7 @@ params {
validationLenientMode = false
validationSchemaIgnoreParams = 'genomes, cf_ploidy'
validationShowHiddenParams = false
validate_params = true
validate_params = false

}

Expand Down
33 changes: 30 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"fa_icon": "fas fa-toolbox",
"description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
"help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
"pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|snpeff|strelka|tiddit|vep|svaba)?,?)*(?<!,)$"
"pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|manta|merge|mpileup|msisensorpro|mutect2|snpeff|strelka|tiddit|vep|svaba|gridss)?,?)*(?<!,)$"
},
"skip_tools": {
"type": "string",
Expand Down Expand Up @@ -229,7 +229,7 @@
}
},
"sv_calling": {
"title": "SV Calling",
"title": "Structural Variant Calling",
"type": "object",
"description": "Configure SV calling tools",
"default": "",
Expand All @@ -249,8 +249,35 @@
"description": "Provide Indel Mask for SVABA based on hg19/hg38. Should be .bed file",
"hidden": true,
"help_text": "Provide the indel mask path that you want to use for SVABA"
},
"germ_sv_db": {
"type": "string",
"fa_icon": "fas fa-forward",
"description": "Provide Germline SV file for reference for SVABA based on hg19/hg38. Should be .bed file",
"hidden": true,
"help_text": "Provide the Germline SV file path that you want to use for SVABA"
},
"simple_seq_db": {
"type": "string",
"fa_icon": "fas fa-forward",
"description": "Provide file containing sites of simple DNA for SVABA based on hg19/hg38. Should be .bed file",
"hidden": true,
"help_text": "Provide the simple DNA file path that you want to use for SVABA"
},
"blacklist_gridss": {
"type": "string",
"fa_icon": "fas fa-forward",
"description": "Provide file Blacklist for GRIDSS based on hg19/hg38. Should be .bed file",
"hidden": true,
"help_text": "Provide the Blacklist file path that you want to use for GRIDSS"
},
"pon_gridss": {
"type": "string",
"fa_icon": "fas fa-forward",
"description": "Provide the PON directory for GRIDSS Somatic filter",
"hidden": true,
"help_text": "Provide the PON directory for GRIDSS. Must contain both .bed and .bedpe file."
}

}
},
"variant_calling": {
Expand Down
80 changes: 80 additions & 0 deletions subworkflows/local/bam_svcalling_gridss/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//
// GRIDSS SV CALLING
//
//
//

include { GRIDSS_GRIDSS } from '../../../modules/local/gridss/gridss/main.nf'
include { GRIDSS_SOMATIC } from '../../../modules/local/gridss/somaticFilter/main.nf'

workflow BAM_SVCALLING_GRIDSS {
take:
cram // channel: [mandatory] [ meta, normalcram, normalcrai, tumorcram, tumorcrai ]
fasta // channel: [mandatory] reference fasta
fasta_fai // channel: [mandatory] reference fasta index
bwa_index // channel: [mandatory] bwa index path
blacklist_gridss // optional: blacklist bed file for gridss


main:
versions = Channel.empty()
vcf = Channel.empty()
vcf_index = Channel.empty()
assembly_bam = Channel.empty()

GRIDSS_GRIDSS(cram, fasta, fasta_fai, bwa_index, blacklist_gridss)

vcf = GRIDSS_GRIDSS.out.vcf
vcf_index = GRIDSS_GRIDSS.out.vcf_index
assembly_bam = GRIDSS_GRIDSS.out.assembly


versions = versions.mix(GRIDSS_GRIDSS.out.versions)

emit:
vcf
vcf_index
assembly_bam


versions

}


workflow BAM_SVCALLING_GRIDSS_SOMATIC {
take:
vcf
pondir_gridss

main:
versions = Channel.empty()
somatic_all = Channel.empty()
somatic_high_confidence = Channel.empty()

GRIDSS_SOMATIC(vcf, pondir_gridss)

somatic_all = GRIDSS_SOMATIC.out.somatic_all_vcf
somatic_high_confidence = GRIDSS_SOMATIC.out.somatic_high_vcf

versions = GRIDSS_SOMATIC.out.versions

all_vcf = Channel.empty().mix(somatic_all, somatic_high_confidence)

emit:
somatic_all
somatic_high_confidence
all_vcf

versions

}









Loading

0 comments on commit 770715d

Please sign in to comment.