diff --git a/CHANGELOG.md b/CHANGELOG.md index a7e358ea..e61cc28a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed bug in nf-core `RRNATRANSCRIPTS` module [#563](https://github.com/nf-core/rnafusion/issues/563) - Fixed bug in `GFFREAD` that caused output `gffread_fasta` not being produced [#565](https://github.com/nf-core/rnafusion/issues/565) - Fixed bug in `FUSIONCATCHER_DOWNLOAD` that caused an error when running with singularity profile [#573](https://github.com/nf-core/rnafusion/issues/573) +- Fixed missing script `gtf2bed` which caused local module `GET_RRNA_TRANSCRIPTS` to fail [#602](https://github.com/nf-core/rnafusion/issues/602) ### Removed diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 00000000..cfa91cf7 --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + + +close IN; diff --git a/modules.json b/modules.json index b41d5fa5..0c6c5517 100644 --- a/modules.json +++ b/modules.json @@ -20,11 +20,6 @@ "git_sha": "467c202a876d26af544fa8c4b22a050a535462a7", "installed_by": ["modules"] }, - "bedops/convert2bed": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] - }, "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/local/get_rrna_transcript/main.nf b/modules/local/get_rrna_transcript/main.nf index 5331f534..901c17b4 100644 --- a/modules/local/get_rrna_transcript/main.nf +++ b/modules/local/get_rrna_transcript/main.nf @@ -20,7 +20,7 @@ process GET_RRNA_TRANSCRIPTS { script: """ - $baseDir/bin/get_rrna_transcripts.py --gtf ${gtf} --output rrna.gtf + $baseDir/bin/get_rrna_transcripts.py ${gtf} rrna.gtf $baseDir/bin/gtf2bed rrna.gtf > rrna.bed diff --git a/modules/nf-core/bedops/convert2bed/environment.yml b/modules/nf-core/bedops/convert2bed/environment.yml deleted file mode 100644 index 3c13066f..00000000 --- a/modules/nf-core/bedops/convert2bed/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - "bioconda::bedops=2.4.41" diff --git a/modules/nf-core/bedops/convert2bed/main.nf b/modules/nf-core/bedops/convert2bed/main.nf deleted file mode 100644 index a23efbd5..00000000 --- a/modules/nf-core/bedops/convert2bed/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process BEDOPS_CONVERT2BED { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedops:2.4.41--h4ac6f70_2': - 'biocontainers/bedops:2.4.41--h4ac6f70_2' }" - - input: - tuple val(meta), path(in_file) - - output: - tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def format = in_file.getExtension() - """ - convert2bed \\ - $args \\ - -i $format \\ - < $in_file \\ - > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedops/convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedops/convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedops/convert2bed/meta.yml b/modules/nf-core/bedops/convert2bed/meta.yml deleted file mode 100644 index 6d84c031..00000000 --- a/modules/nf-core/bedops/convert2bed/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: "bedops_convert2bed" -description: Convert BAM/GFF/GTF/GVF/PSL files to bed -keywords: - - convert - - bed - - genomics -tools: - - "bedops": - description: "High-performance genomic feature operations." - homepage: "https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed" - documentation: "https://bedops.readthedocs.io/en/latest/" - tool_dev_url: "https://github.com/bedops" - doi: "10.1093/bioinformatics/bts277" - licence: ["GNU v2"] - identifier: biotools:bedops - -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - in_file: - type: file - description: Input file -output: - - bed: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - "*.bed": - type: file - description: Sorted BED file - pattern: "*.{bed}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@rannick" -maintainers: - - "@rannick" diff --git a/modules/nf-core/bedops/convert2bed/tests/main.nf.test b/modules/nf-core/bedops/convert2bed/tests/main.nf.test deleted file mode 100644 index 3f2b193a..00000000 --- a/modules/nf-core/bedops/convert2bed/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -// nf-core modules test bedops/convert2bed -nextflow_process { - - name "Test Process BEDOPS_CONVERT2BED" - script "../main.nf" - process "BEDOPS_CONVERT2BED" - - tag "modules" - tag "modules_nfcore" - tag "bedops" - tag "bedops/convert2bed" - - test("sarscov2 - gtf") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - gtf - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap b/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap deleted file mode 100644 index e2482924..00000000 --- a/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap +++ /dev/null @@ -1,68 +0,0 @@ -{ - "sarscov2 - gtf - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" - ], - "bed": [ - [ - { - "id": "test" - }, - "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-11T12:06:14.755423333" - }, - "sarscov2 - gtf": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.bed:md5,adfdd36e848a62f4b0ea8a694abe9659" - ] - ], - "1": [ - "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" - ], - "bed": [ - [ - { - "id": "test" - }, - "test.bed:md5,adfdd36e848a62f4b0ea8a694abe9659" - ] - ], - "versions": [ - "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-11T12:06:08.876003152" - } -} \ No newline at end of file diff --git a/modules/nf-core/bedops/convert2bed/tests/tags.yml b/modules/nf-core/bedops/convert2bed/tests/tags.yml deleted file mode 100644 index fd2c2993..00000000 --- a/modules/nf-core/bedops/convert2bed/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bedops/convert2bed: - - "modules/nf-core/bedops/convert2bed/**"