Skip to content

Commit

Permalink
Update metamdbg version; use gzipped bin files
Browse files Browse the repository at this point in the history
  • Loading branch information
Jim Downie committed Dec 18, 2024
1 parent 4cac07b commit 9950440
Show file tree
Hide file tree
Showing 28 changed files with 681 additions and 94 deletions.
3 changes: 3 additions & 0 deletions bin/bin_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ read_taxonomy <- function(file) {
ncbi_classification = `Majority vote NCBI classification`,
taxid)
}
# gtdb doesn't drop the extension
df <- df |>
mutate(bin = str_extract(bin, "(.*)\\.[^\\.]+$", group = 1))

return(df)
}
Expand Down
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ process {
}

withName: 'CHECKM2_PREDICT' {
ext.args = { "--extension fa" }
ext.args = { "--extension gz" }
ext.prefix = { "${meta.id}_${meta.assembler}_${meta.binner}" }
tag = { "${meta.id}_${meta.assembler}_${meta.binner}" }
publishDir = [
Expand Down Expand Up @@ -134,7 +134,7 @@ process {
}

withName: 'GTDBTK_CLASSIFYWF' {
ext.args = "--extension fa"
ext.args = "--extension gz"
ext.prefix = { "${meta.id}_${meta.assembler}_${meta.binner}" }
tag = { "${meta.id}_${meta.assembler}_${meta.binner}" }
publishDir = [
Expand Down
15 changes: 12 additions & 3 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"barrnap": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bwamem2/index": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand Down Expand Up @@ -57,8 +62,7 @@
"maxbin2": {
"branch": "master",
"git_sha": "283613159e079152f1336cef0db1c836086206e0",
"installed_by": ["modules"],
"patch": "modules/nf-core/maxbin2/maxbin2.diff"
"installed_by": ["modules"]
},
"metabat2/metabat2": {
"branch": "master",
Expand All @@ -68,7 +72,7 @@
},
"metamdbg/asm": {
"branch": "master",
"git_sha": "7c08494acb5aba0763c5c6db87f82b249de87ea8",
"git_sha": "88fbcaa0e08c4f5ab925bebe2234b2b68953e8cd",
"installed_by": ["modules"]
},
"minimap2/align": {
Expand All @@ -82,6 +86,11 @@
"git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
"installed_by": ["modules"]
},
"prokka": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"pyrodigal": {
"branch": "master",
"git_sha": "938e803109104e30773f76a7142442722498fef1",
Expand Down
7 changes: 4 additions & 3 deletions modules/local/bin3c/cluster/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process BIN3C_CLUSTER {
tuple val(meta), path(contigs), path(map)

output:
tuple val(meta), path("*.fa") , emit: fasta, optional: true
tuple val(meta), path("*.fa.gz") , emit: fasta, optional: true
tuple val(meta), path("*.[!fna,log]*"), emit: clustering
tuple val(meta), path("*.log") , emit: log
path("versions.yml") , emit: versions
Expand All @@ -27,11 +27,12 @@ process BIN3C_CLUSTER {
# bin3c renames contigs, we don't want that
for bin in bin3c/fasta/*.fna; do
basename=`basename \$bin`
awk -F" " '{if(\$1~">"){print ">" substr(\$2,8)}else{print \$0}}' \$bin > ${prefix}.\${basename%.fna}.fa
bn=`basename \$bin .fna`
awk -F" " '{if(\$1~">"){print ">" substr(\$2,8)}else{print \$0}}' \$bin > ${prefix}.\${bn}.fa
done
find bin3c -maxdepth 1 -type f -exec sh -c 'name=`basename {}`; mv {} ${prefix}.\$name' \\;
gzip *.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
6 changes: 3 additions & 3 deletions modules/local/contig2bintofasta/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ process CONTIG2BINTOFASTA {
tuple val(meta), path(contigs), path(contig2bin)

output:
tuple val(meta), path("*.fa*"), emit: bins
path("versions.yml") , emit: versions
tuple val(meta), path("*.fa.gz"), emit: bins
path("versions.yml") , emit: versions

script:
def args = task.ext.args ?: ''
Expand All @@ -21,7 +21,7 @@ process CONTIG2BINTOFASTA {
awk '{print \$2}' ${contig2bin} | sort -u | while read bin
do
grep -w \${bin} ${contig2bin} | awk '{ print \$1 }' > \${bin}.ctglst
seqkit grep -f \${bin}.ctglst ${contigs} > \${bin}.fa
seqkit grep -f \${bin}.ctglst ${contigs} | gzip > \${bin}.fa.gz
done
cat <<-END_VERSIONS > versions.yml
Expand Down
21 changes: 15 additions & 6 deletions modules/local/fastatocontig2bin/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,28 @@ process FASTATOCONTIG2BIN {
path("versions.yml") , emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def compressed_bins = bins.findAll { it.getExtension() == "gz" }
def decompress_bins = compressed_bins.size() > 0 ? "gunzip ${compressed_bins.join(" ")}" : ""
def clean_bins = bins.collect { it.toString() - ~/\.gz$/ }
def remove_compressed = compressed_bins.size() > 0 ? "rm ${compressed_bins.collect { it.toString() - ~/\.gz$/ }.join(" ")}" : ""
"""
${decompress_bins}
awk \\
'BEGIN { OFS = "\t" }
BEGINFILE {
cmd=sprintf("basename %s .%s", FILENAME, "${extension}")
cmd | getline bin
bin = FILENAME
sub(".*/", "", bin)
sub(/\\.[^\\.]+\$/, "", bin)
}
/^>/ {
sub(/>/, "", \$1)
print \$1,bin
}' ${bins} > ${prefix}.tsv
print \$1, bin
}' ${clean_bins.join(" ")} > ${prefix}.tsv
${remove_compressed}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
10 changes: 6 additions & 4 deletions modules/local/metator/pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ process METATOR_PIPELINE {
val hic_enzymes

output:
tuple val(meta), path("bin_summary.txt") , emit: bin_summary
tuple val(meta), path("binning.txt") , emit: contig2bin
tuple val(meta), path("bins/*.fa"), emit: bins
path "versions.yml" , emit: versions
tuple val(meta), path("bin_summary.txt") , emit: bin_summary
tuple val(meta), path("binning.txt") , emit: contig2bin
tuple val(meta), path("bins/*.fa.gz") , emit: bins
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down Expand Up @@ -46,6 +46,8 @@ process METATOR_PIPELINE {
awk -F" " '{if(\$1~">"){ print \$1 } else { print \$0 } }' \$bin > bins/\${binname}
done
gzip bins/*.fa
rm -r final_bin_unscaffold
cat <<-END_VERSIONS > versions.yml
Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/barrnap/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions modules/nf-core/barrnap/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions modules/nf-core/barrnap/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 57 additions & 0 deletions modules/nf-core/barrnap/tests/main.nf.test

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9950440

Please sign in to comment.