Skip to content

Commit

Permalink
Merge pull request #62 from mskcc/feature/agfusion_module
Browse files Browse the repository at this point in the history
Feature/agfusion module
  • Loading branch information
anoronh4 authored Aug 3, 2023
2 parents 4ceb357 + 7a03e7b commit ef7ee4c
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 0 deletions.
12 changes: 12 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,21 @@ process {
pattern: '*.tsv'
]
}

withName: AGFUSION_BATCH {
ext.args = "-a cff_ext --noncanonical --middlestar"
publishDir = [
path: { "${params.outdir}/analysis/${meta.id}/agfusion" },
mode: params.publish_dir_mode,
pattern: 'fusion_transcripts.csv',
enabled: false
]
}

withName: MERGE_CFF {
ext.args = { "-H " }
}

withName: ONCOKB_FUSIONANNOTATOR {
ext.when = params.run_oncokb_fusionannotator
secret = ["ONCOKB_TOKEN"]
Expand Down
43 changes: 43 additions & 0 deletions modules/local/agfusion/batch/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process AGFUSION_BATCH {
tag "$meta.id"
label 'process_low'

// Note: 2.7X indices incompatible with AWS iGenomes.
conda 'bioconda::agfusion=1.252'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'cmopipeline/agfusion:0.0.3' :
'cmopipeline/agfusion:0.0.3' }"

input:
tuple val(meta), path(fusions)
path(agfusion_db)
path(pyensembl_cache)

output:
tuple val(meta), path("${prefix}") , emit: fusions_annotated
tuple val(meta), path("${prefix}.fusion_transcripts.csv"), emit: fusion_transcripts
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
export PYENSEMBL_CACHE_DIR=\$PWD/${pyensembl_cache}
agfusion batch \\
-f ${fusions} \\
-db ${agfusion_db} \\
-o ${prefix} \\
${args}
cat ${prefix}/*/*.fusion_transcripts.csv | awk -F"," -v OFS="\\t" 'NR != 1 && FNR == 1 {next;}{print}' > fusion_transcripts.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
agfusion: \$(agfusion -v) (fork)
END_VERSIONS
"""
}
49 changes: 49 additions & 0 deletions modules/local/agfusion/batch/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: agfusion_batch
description: AGFusion batch annotation
keywords:
- agfusion
- fusion
- batch
- frame
tools:
- batch:
description: Annotate a file containing fusions
homepage: https://github.com/anoronh4/AGFusion
documentation: https://github.com/anoronh4/AGFusion/blob/master/README.md

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fusions:
type: path
description: Fusions in one of many formats (may include arriba outputs or cff)
pattern: "*"
- agfusion_db:
type: path
description: File containing AGFusion reference information.
pattern: "*.db"
- pyensembl_cache:
type: path
description: Folder containing pyensembl cache.
pattern: "*"

output:
- versions:
type: path
description: File containing software versions
pattern: "versions.yml"
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- agfusion_result:
type: path
description: Folder containing annotations
pattern: "${prefix}/"

authors:
- "@anoronh4"
26 changes: 26 additions & 0 deletions modules/local/agfusion/container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM ubuntu:bionic-20230530

LABEL maintainer="Anne Marie Noronha ([email protected])" \
version.image="0.0.3"

# INSTALL DEPENDENCIES

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update -y
RUN apt-get install -y build-essential python3 python3-pip python3-matplotlib python3-pandas python3-future python3-biopython curl less vim libnss-sss git zip
RUN pip3 install --upgrade pip
RUN pip3 install pyensembl

# Additional libraries needed for AGFusion build command
RUN apt-get install -y default-libmysqlclient-dev
RUN pip3 install mysqlclient

# INSTALL AGFUSION & DATABASE FILES
WORKDIR /usr/local/bin
RUN git clone https://github.com/anoronh4/AGFusion.git --branch feature/metafusion_parser
WORKDIR /usr/local/bin/AGFusion
RUN pip3 install .

# downgrade pyensembl for compatibility
RUN pip3 install gtfparse==1.2.1 --upgrade
58 changes: 58 additions & 0 deletions modules/local/agfusion/download/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process AGFUSION_DOWNLOAD {
label 'process_low'

// Note: 2.7X indices incompatible with AWS iGenomes.
conda 'bioconda::agfusion=1.252'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'cmopipeline/agfusion:0.0.3' :
'cmopipeline/agfusion:0.0.3' }"

input:
val(ensembl_release)
val(genome)

output:
path "agfusion.*.db" , emit: agfusion_db
path "pyensembl_cache", emit: pyensembl_cache
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def agfusion_genome = ['GRCh37','smallGRCh37','hg19'].contains(genome) ? 'hg19' :
['GRCh38','hg38'].contains(genome) ? 'hg38' :
['GRCm38','mm10'].contains(genome) ? 'mm10' :
def pyensembl_species = ['GRCm38','mm10'].contains(genome) ? 'mus_musculus' : 'homo_sapiens'
if (ensembl_release < 93) {
"""
export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache
pyensembl install --species ${pyensembl_species} --release ${ensembl_release}
agfusion download -g ${agfusion_genome}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
agfusion: \$(agfusion -v) (fork)
END_VERSIONS
"""
} else {
"""
export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache
pyensembl install --species ${pyensembl_species} --release ${ensembl_release}
curl http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/database_files/pfamA.txt.gz > pfamA.txt.gz
gunzip pfamA.txt.gz
agfusion build --dir . --species ${agfusion_genome} --release ${ensembl_release} --pfam pfamA.txt
rm pfamA.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
agfusion: \$(agfusion -v) (fork)
END_VERSIONS
"""
}
}
35 changes: 35 additions & 0 deletions modules/local/agfusion/download/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: fusionreport_download
description: Build DB for fusionreport
keywords:
- sort
tools:
- fusioncatcher:
description: Build DB for fusionreport
homepage: https://github.com/ndaniel/fusioncatcher/
documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md
tool_dev_url: https://github.com/ndaniel/fusioncatcher/
doi: "10.1101/011650"
licence: ["GPL v3"]

input:
- username:
type: value
description: Organism for which the data is downloaded from Ensembl database and built
pattern: "*"
- passwd:
type: value
description: Organism for which the data is downloaded from Ensembl database and built
pattern: "*"

output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reference:
type: directory
description: directory containing the genome resource files required for fusioncatcher
pattern: "fusioncatcher-genome"

authors:
- "@praveenraj2018"

0 comments on commit ef7ee4c

Please sign in to comment.