-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #62 from mskcc/feature/agfusion_module
Feature/agfusion module
- Loading branch information
Showing
6 changed files
with
223 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
process AGFUSION_BATCH { | ||
tag "$meta.id" | ||
label 'process_low' | ||
|
||
// Note: 2.7X indices incompatible with AWS iGenomes. | ||
conda 'bioconda::agfusion=1.252' | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'cmopipeline/agfusion:0.0.3' : | ||
'cmopipeline/agfusion:0.0.3' }" | ||
|
||
input: | ||
tuple val(meta), path(fusions) | ||
path(agfusion_db) | ||
path(pyensembl_cache) | ||
|
||
output: | ||
tuple val(meta), path("${prefix}") , emit: fusions_annotated | ||
tuple val(meta), path("${prefix}.fusion_transcripts.csv"), emit: fusion_transcripts | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
export PYENSEMBL_CACHE_DIR=\$PWD/${pyensembl_cache} | ||
agfusion batch \\ | ||
-f ${fusions} \\ | ||
-db ${agfusion_db} \\ | ||
-o ${prefix} \\ | ||
${args} | ||
cat ${prefix}/*/*.fusion_transcripts.csv | awk -F"," -v OFS="\\t" 'NR != 1 && FNR == 1 {next;}{print}' > fusion_transcripts.csv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
agfusion: \$(agfusion -v) (fork) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
name: agfusion_batch | ||
description: AGFusion batch annotation | ||
keywords: | ||
- agfusion | ||
- fusion | ||
- batch | ||
- frame | ||
tools: | ||
- batch: | ||
description: Annotate a file containing fusions | ||
homepage: https://github.com/anoronh4/AGFusion | ||
documentation: https://github.com/anoronh4/AGFusion/blob/master/README.md | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- fusions: | ||
type: path | ||
description: Fusions in one of many formats (may include arriba outputs or cff) | ||
pattern: "*" | ||
- agfusion_db: | ||
type: path | ||
description: File containing AGFusion reference information. | ||
pattern: "*.db" | ||
- pyensembl_cache: | ||
type: path | ||
description: Folder containing pyensembl cache. | ||
pattern: "*" | ||
|
||
output: | ||
- versions: | ||
type: path | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- agfusion_result: | ||
type: path | ||
description: Folder containing annotations | ||
pattern: "${prefix}/" | ||
|
||
authors: | ||
- "@anoronh4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
FROM ubuntu:bionic-20230530 | ||
|
||
LABEL maintainer="Anne Marie Noronha ([email protected])" \ | ||
version.image="0.0.3" | ||
|
||
# INSTALL DEPENDENCIES | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN apt-get update -y | ||
RUN apt-get install -y build-essential python3 python3-pip python3-matplotlib python3-pandas python3-future python3-biopython curl less vim libnss-sss git zip | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install pyensembl | ||
|
||
# Additional libraries needed for AGFusion build command | ||
RUN apt-get install -y default-libmysqlclient-dev | ||
RUN pip3 install mysqlclient | ||
|
||
# INSTALL AGFUSION & DATABASE FILES | ||
WORKDIR /usr/local/bin | ||
RUN git clone https://github.com/anoronh4/AGFusion.git --branch feature/metafusion_parser | ||
WORKDIR /usr/local/bin/AGFusion | ||
RUN pip3 install . | ||
|
||
# downgrade pyensembl for compatibility | ||
RUN pip3 install gtfparse==1.2.1 --upgrade |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
process AGFUSION_DOWNLOAD { | ||
label 'process_low' | ||
|
||
// Note: 2.7X indices incompatible with AWS iGenomes. | ||
conda 'bioconda::agfusion=1.252' | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'cmopipeline/agfusion:0.0.3' : | ||
'cmopipeline/agfusion:0.0.3' }" | ||
|
||
input: | ||
val(ensembl_release) | ||
val(genome) | ||
|
||
output: | ||
path "agfusion.*.db" , emit: agfusion_db | ||
path "pyensembl_cache", emit: pyensembl_cache | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def agfusion_genome = ['GRCh37','smallGRCh37','hg19'].contains(genome) ? 'hg19' : | ||
['GRCh38','hg38'].contains(genome) ? 'hg38' : | ||
['GRCm38','mm10'].contains(genome) ? 'mm10' : | ||
def pyensembl_species = ['GRCm38','mm10'].contains(genome) ? 'mus_musculus' : 'homo_sapiens' | ||
if (ensembl_release < 93) { | ||
""" | ||
export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache | ||
pyensembl install --species ${pyensembl_species} --release ${ensembl_release} | ||
agfusion download -g ${agfusion_genome} | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
agfusion: \$(agfusion -v) (fork) | ||
END_VERSIONS | ||
""" | ||
} else { | ||
""" | ||
export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache | ||
pyensembl install --species ${pyensembl_species} --release ${ensembl_release} | ||
curl http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/database_files/pfamA.txt.gz > pfamA.txt.gz | ||
gunzip pfamA.txt.gz | ||
agfusion build --dir . --species ${agfusion_genome} --release ${ensembl_release} --pfam pfamA.txt | ||
rm pfamA.txt | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
agfusion: \$(agfusion -v) (fork) | ||
END_VERSIONS | ||
""" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
name: fusionreport_download | ||
description: Build DB for fusionreport | ||
keywords: | ||
- sort | ||
tools: | ||
- fusioncatcher: | ||
description: Build DB for fusionreport | ||
homepage: https://github.com/ndaniel/fusioncatcher/ | ||
documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md | ||
tool_dev_url: https://github.com/ndaniel/fusioncatcher/ | ||
doi: "10.1101/011650" | ||
licence: ["GPL v3"] | ||
|
||
input: | ||
- username: | ||
type: value | ||
description: Organism for which the data is downloaded from Ensembl database and built | ||
pattern: "*" | ||
- passwd: | ||
type: value | ||
description: Organism for which the data is downloaded from Ensembl database and built | ||
pattern: "*" | ||
|
||
output: | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- reference: | ||
type: directory | ||
description: directory containing the genome resource files required for fusioncatcher | ||
pattern: "fusioncatcher-genome" | ||
|
||
authors: | ||
- "@praveenraj2018" |