Skip to content

Commit

Permalink
fix: minor issues with sigpromatrix and snv multiplicity
Browse files Browse the repository at this point in the history
  • Loading branch information
shihabdider committed Jul 7, 2024
1 parent 793835b commit dc3dc6f
Show file tree
Hide file tree
Showing 12 changed files with 730 additions and 293 deletions.
Binary file added bin/SnpSift.jar
Binary file not shown.
Binary file added bin/downsamplevcf.jar
Binary file not shown.
1 change: 1 addition & 0 deletions bin/sigprofilerassignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def main(args):
output=output_indel,
input_type="vcf",
context_type="ID",
collapse_to_SBS96=False,
cosmic_version=cosmic_version,
exome=False,
genome_build=genome,
Expand Down
90 changes: 81 additions & 9 deletions bin/snv_multiplicity3.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
make_option(c("-g", "--germline_snv"), type = "character", help = "Path to germline snv file"),
make_option(c("-j", "--jabba"), type = "character", help = "Path to jabba file"),
make_option(c("-f", "--fasta"), type = "character", help = "Path to ref fasta file"),
make_option(c("--downsample_jar"), type = "character", help = "Path to downsample jar"),
make_option(c("--snpsift_jar"), type = "character", help = "Path to snpsift jar"),
make_option(c("--vcfeff_perl"), type = "character", help = "Path to vcfeff perl"),
make_option(c("-k", "--cores"), type = "integer", help = "Number of cores"),
make_option(c("-o", "--outdir"), type = "character", default = './', help = "output directory"),
make_option(c("-o", "--outdir"), type = "character", default = './', help = "output directory")
)

parseobj = OptionParser(option_list=option_list)
Expand Down Expand Up @@ -40,10 +43,80 @@
library(rtracklayer)
library(khtools)
library(skidb)
devtools::load_all("~/git/zitools")
library(zitools)
})
})

#' @name gg2jab
#' @title gg2jab
#'
#' @description
#'
#' Hacky way to make a JaBbA-like output from a gGraph
#'
#' @param gg (gGraph)
#' @param purity (numeric) overwrite automated purity (default 1)
#' @param ploidy (numeric) overwrite automated purity ploidy calculation
#' @return list with names:
#' - segstats (GRanges, signed)
#' - adj (adjacency matrix)
#' - ab.edges (array)
#' - purity (numeric)
#' - ploidy (numeric)
#' - junctions (GRangesList)
gg2jab = function(gg, purity = NA, ploidy = NA) {

if (!inherits(gg, 'gGraph')) {
stop("Must supply gGraph")
}

## create ab.edges object
ab.edges = array(NA, dim = c(length(gg$junctions[type == "ALT"]), 3, 2),
dimnames = list(NULL, c('from', 'to', 'edge.ix'), c('+', '-')))
ab.edges[, 1, 1] = gg$sedgesdt[sedge.id > 0][match(gg$junctions[type == "ALT"]$dt$edge.id, edge.id), from]
ab.edges[, 2, 1] = gg$sedgesdt[sedge.id > 0][match(gg$junctions[type == "ALT"]$dt$edge.id, edge.id), to]
ab.edges[, 3, 1] = gg$junctions[type == "ALT"]$dt$edge.id
ab.edges[, 1, 2] = gg$sedgesdt[sedge.id < 0][match(gg$junctions[type == "ALT"]$dt$edge.id, edge.id), from]
ab.edges[, 2, 2] = gg$sedgesdt[sedge.id < 0][match(gg$junctions[type == "ALT"]$dt$edge.id, edge.id), to]
ab.edges[, 3, 2] = gg$junctions[type == "ALT"]$dt$edge.id

## prepare djacency matrix - a sparse matrix where the nonzero entries
## are the CNs if that field exists and 1 otherwise
adj.dims = dim(gg$adj)
if (!is.null(gg$sedgesdt$cn)) {
adj = Matrix::sparseMatrix(i = gg$sedgesdt[, from],
j = gg$sedgesdt[, to],
x = gg$sedgesdt[, cn],
dims = adj.dims)
} else {
adj = Matrix::sparseMatrix(i = gg$sedgesdt[, from],
j = gg$sedgesdt[, to],
x = 1,
dims = adj.dims)
}

## initalize output which is a list
res = list(segstats = gg$gr,
adj = adj,
junctions = gg$junctions[type == "ALT"]$grl,
ab.edges = ab.edges)

## calculate purity/ploidy if not supplied
if (!is.na(ploidy)) {
res$ploidy = ploidy
} else {
res$ploidy = weighted.mean(gg$nodes$dt[, cn], gg$nodes$dt[, width], na.rm = TRUE)
}

if (!is.na(purity)) {
res$purity = purity
} else {
res$purity = 1
}

return(res)
}

# debug.R contents
est_snv_cn_stub =
function (vcf, jab, tumbam = NULL, germ_subsample = 2e+05, somatic = FALSE,
Expand All @@ -66,10 +139,10 @@
system2("bcftools", c("view -i 'FILTER==\"PASS\"'", vcf),
stdout = tmpvcf)
system2("java",
sprintf("-jar ~/software/jvarkit/dist/downsamplevcf.jar -N 10 -n %s %s",
germ_subsample, tmpvcf),
sprintf("-jar %s -N 10 -n %s %s",
opt$downsample_jar, germ_subsample, tmpvcf),
stdout = tmpvcf2,
env = "module unload java; module load java/1.8;")
)
gvcf = parsesnpeff(
tmpvcf, coding_alt_only = TRUE, keepfile = FALSE,
altpipe = TRUE)
Expand Down Expand Up @@ -280,9 +353,9 @@
on.exit(unlink(tmp.path))
try2({
catcmd = if (grepl("(.gz)$", vcf)) "zcat" else "cat"
onepline = "/gpfs/commons/groups/imielinski_lab/git/mskilab/flows/modules/SnpEff/source/snpEff/scripts/vcfEffOnePerLine.pl"
onepline = opt$vcfeff_perl
if (coding_alt_only) {
filt = "java -Xmx20m -Xms20m -XX:ParallelGCThreads=1 -jar /gpfs/commons/groups/imielinski_lab/git/mskilab/flows/modules/SnpEff/source/snpEff/SnpSift.jar filter \"( ANN =~ 'chromosome_number_variation|exon_loss_variant|rare_amino_acid|stop_lost|transcript_ablation|coding_sequence|regulatory_region_ablation|TFBS|exon_loss|truncation|start_lost|missense|splice|stop_gained|frame' )\""
filt = sprintf("java -Xmx20m -Xms20m -XX:ParallelGCThreads=1 -jar %s filter \"( ANN =~ 'chromosome_number_variation|exon_loss_variant|rare_amino_acid|stop_lost|transcript_ablation|coding_sequence|regulatory_region_ablation|TFBS|exon_loss|truncation|start_lost|missense|splice|stop_gained|frame' )\"", opt$snpsift_jar)
if (filterpass)
cmd = sprintf(paste(catcmd, "%s | %s | %s | bcftools view -i 'FILTER==\"PASS\"' | bgzip -c > %s"),
vcf, onepline, filt, tmp.path)
Expand Down Expand Up @@ -316,8 +389,7 @@
"protein_pos", "distance")
data.table::setnames(ann, fn)
if ("AD" %in% names(geno(vcf))) {
adep = setnames(as.data.table(geno(vcf)$AD[, , 1:2]),
c("ref", "alt"))
adep = setnames(as.data.table(geno(vcf)$AD[, 2, 1:2]), c("ref", "alt"))
gt = geno(vcf)$GT
}
else if (all(c("AU", "GU", "CU", "TU", "TAR", "TIR") %in%
Expand Down
60 changes: 60 additions & 0 deletions bin/vcfEffOnePerLine.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/perl

#-------------------------------------------------------------------------------
#
# Read a VCF file (via STDIN), split EFF fields from INFO column into many lines
# leaving one line per effect.
#
# Note: In lines having multiple effects, all other information will be
# repeated. Only the 'EFF' field will change.
#
# Pablo Cingolani 2012
#-------------------------------------------------------------------------------

$INFO_FIELD_NUM = 7;

while( $l = <STDIN> ) {
# Show header lines
if( $l =~ /^#/ ) { print $l; }
else {
chomp $l;

@t = @infos = @effs = (); # Clear arrays

# Non-header lines: Parse fields
@t = split /\t/, $l;

# Get INFO column
$info = $t[ $INFO_FIELD_NUM ];

# Parse INFO column
@infos = split /;/, $info;

# Find EFF field
$infStr = "";
foreach $inf ( @infos ) {
# Is this the EFF field? => Find it and split it
if( $inf =~/^EFF=(.*)/ ) {
@effs = split /,/, $1;
$fieldName = "EFF";
} elsif( $inf =~/^ANN=(.*)/ ) {
@effs = split /,/, $1;
$fieldName = "ANN";
} else {
$infStr .= ( $infStr eq '' ? '' : ';' ) . $inf;
}
}

# Print VCF line
if( $#effs <= 0 ) { print "$l\n"; } # No EFF found, just show line
else {
$pre = "";
for( $i=0 ; $i < $INFO_FIELD_NUM ; $i++ ) { $pre .= ( $i > 0 ? "\t" : "" ) . "$t[$i]"; }

$post = "";
for( $i=$INFO_FIELD_NUM+1 ; $i <= $#t ; $i++ ) { $post .= "\t$t[$i]"; }

foreach $eff ( @effs ) { print $pre . "\t" . $infStr . ( $infStr eq '' ? '' : ';' ) . "$fieldName=$eff" . $post . "\n" ; }
}
}
}
2 changes: 1 addition & 1 deletion conf/modules/sigprofilerassignment.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/signatures/sigprofilerassignment/somatic/${meta.id}/" },
pattern: "*/**/*.txt"
pattern: "*/**/*{.txt,.all}"
]
}
}
8 changes: 8 additions & 0 deletions conf/modules/variant_annotation.config
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ process {
pattern: "*{.vcf*,.txt*,.html*,.csv*}"
]
}
withName: '.*VCF_SNV_MULTIPLICITY' {
ext.when = { params.tools && params.tools.split(',').contains('sage') }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/snv_multiplicity/${meta.id}/" },
pattern: "*{.rds*}"
]
}
withName: '.*VCF_SNPEFF_GERMLINE:SNPEFF_VCF_TO_BCF' {
ext.when = { params.tools && params.tools.split(',').contains('snpeff') }
publishDir = [
Expand Down
3 changes: 1 addition & 2 deletions modules/local/sigprofilerassignment/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process SIGPROFILERASSIGNMENT {
output:
tuple val(meta), path("sbs_results/Assignment_Solution/**/*.txt") , emit: sbs_sigs, optional: true
tuple val(meta), path("indel_results/Assignment_Solution/**/*.txt") , emit: indel_sigs, optional: true
tuple val(meta), path("sigmat_results/Assignment_Solution/**/*.txt") , emit: sig_matrix, optional: true
tuple val(meta), path("sig_inputs/output/**/*.all") , emit: sig_matrix, optional: true
path "versions.yml" , emit: versions

when:
Expand All @@ -33,7 +33,6 @@ process SIGPROFILERASSIGNMENT {
--input-vcf ${vcf} \\
--genome ${genome} \\
--cosmic-version ${cosmic_version} \\
--output-directory ./ \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
59 changes: 59 additions & 0 deletions modules/local/snv_multiplicity/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process SNV_MULTIPLICITY {

tag "$meta.id"
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://mskilab/snv_multiplicity:0.0.3':
'mskilab/snv_multiplicity:0.0.3' }"

input:
// tuple val(meta), path(somatic_snv), path(somatic_snv_tbi), path(germline_snv), path(germline_snv_tbi), path(jabba_rds)
tuple val(meta), path(somatic_snv, stageAs: "somatic_snv.vcf"), path(germline_snv, stageAs: "germline_snv.vcf"), path(jabba_rds)
path(ref)
path(ref_fai)

output:
tuple val(meta), path('*est_snv_cn_somatic.rds'), emit: snv_multiplicity_rds
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def DOWNSAMPLE_JAR = "${baseDir}/bin/downsamplevcf.jar"
def SNPSIFT_JAR = "${baseDir}/bin/SnpSift.jar"
def VCFEFF_PERL = "${baseDir}/bin/vcfEffOnePerLine.pl"

"""
export RSCRIPT_PATH=\$(echo "${baseDir}/bin/snv_multiplicity3.R")
Rscript \$RSCRIPT_PATH \\
--somatic_snv ${somatic_snv} \\
--germline_snv ${germline_snv} \\
--fasta ${ref} \\
--jabba ${jabba_rds} \\
--downsample_jar ${DOWNSAMPLE_JAR} \\
--snpsift_jar ${SNPSIFT_JAR} \\
--vcfeff_perl ${VCFEFF_PERL} \\
--cores ${task.cpus} \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
snv_multiplicity: ${VERSION}
END_VERSIONS
"""

stub:
"""
touch est_snv_cn_somatic.rds
cat <<-END_VERSIONS > versions.yml
"${task.process}":
snv_multiplicity: ${VERSION}
END_VERSIONS
"""
}
32 changes: 32 additions & 0 deletions subworkflows/local/vcf_snv_multiplicity/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//
// VCF SNV_MULTIPLICITY
//

include { SNV_MULTIPLICITY } from '../../../modules/local/snv_multiplicity/main.nf'

workflow VCF_SNV_MULTIPLICITY {
// defining inputs
take:
input // required: [meta, somatic_vcf, somatic_tbi, germline_vcf, germline_tbi, jabba_rds]
ref
ref_fai

//Creating empty channels for output
main:
versions = Channel.empty()
snv_multiplicity_rds = Channel.empty()

SNV_MULTIPLICITY(
input,
ref,
ref_fai
)

// initializing outputs from snv_multiplicity
snv_multiplicity_rds = SNV_MULTIPLICITY.out.snv_multiplicity_rds
versions = SNV_MULTIPLICITY.out.versions

emit:
snv_multiplicity_rds
versions
}
Loading

0 comments on commit dc3dc6f

Please sign in to comment.