diff --git a/.github/workflows/workflow_modules.yml b/.github/workflows/workflow_modules.yml
index f3491eca..068c472d 100644
--- a/.github/workflows/workflow_modules.yml
+++ b/.github/workflows/workflow_modules.yml
@@ -54,6 +54,7 @@ jobs:
       - name: Test EMGB import tools
         run: |
           ./bin/emgb.sh --output=output/test1 --runid=1 --binsdir=$(find output/test1/ -name "metabat") \
+                        --blastdb=uniref90 \
                         --db=${EMGB_KEGG_DB} \
                         --workdir="${WORK_DIR}_wFullPipeline" --name=test1
 
diff --git a/bin/emgb.sh b/bin/emgb.sh
index f78806cc..71c196a6 100755
--- a/bin/emgb.sh
+++ b/bin/emgb.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e
 
-VERSION=0.3.1
+VERSION=0.4.0
 
 while [ $# -gt 0 ]; do
 	  case "$1" in
@@ -19,6 +19,8 @@ while [ $# -gt 0 ]; do
 	    ;;
             --type=*) TYPE="${1#*=}"
 	    ;;
+	    --blastdb=*) BLAST_DB="${1#*=}"
+	    ;;
 	    --version) VERSION_CHECK=1
 	    ;;
 	    --debug) DEBUG_CHECK=1
@@ -38,7 +40,7 @@ done
 
 
 function getGenes {
-	nr=$(find $OUTPUT_PATH/$RUN_ID/annotation/ -name "*.ncbi_nr.blast.tsv" -exec readlink -f {} \;  | sed 's/^/ -nr-blast-tab /g')
+	nr=$(find $OUTPUT_PATH/$RUN_ID/annotation/ -name "*.${BLAST_DB}.blast.tsv" -exec readlink -f {} \;  | sed 's/^/ -nr-blast-tab /g')
 	tax=$(find $OUTPUT_PATH/$RUN_ID/annotation/ -name "*.taxonomy.tsv" -exec readlink -f {} \; | sed 's/^/ -mmseqs-lineage /g')
 	ffn=$(find $OUTPUT_PATH/$RUN_ID/annotation -name "*.ffn.gz" -exec readlink -f {} \; | sed 's/^/ -ffn /g')
 	gff=$(find $OUTPUT_PATH/$RUN_ID/annotation -name "*.gff.gz" -exec readlink -f {} \; | sed 's/^/ -gff /g')
@@ -103,6 +105,9 @@ help()
 	echo "              -- (e.g. X in the following example path fullPipelineOutput/SAMPLE/X/binning/)                       "
 	echo "  --binsdir   -- directory of bins. If bin refinement was executed then the bin refinement output should be used."
 	echo "              -- (e.g. --binsdir=fullPipelineOutput/DRR066656/1/binning/0.4.0/metabat)"
+	echo "  --blastdb   -- Blast output that should be exported to emgb"
+	echo "              -- (e.g. the folder name of BLAST_DB: output/test1/1/annotation/0.3.0/mmseqs2/BLAST_DB)"
+	echo "              -- (Examples: bacmet20_predicted, ncbi_nr)"
 	echo "  --db        -- emgb specific kegg database"
 	echo "  --name      -- sample name, e.g. the SAMPLE in the paths above"
 	echo "  --type      -- if other then Illumina: ONT/Hybrid"
diff --git a/default/fullPipeline_illumina_nanpore.yml b/default/fullPipeline_illumina_nanpore.yml
index d6d93253..d1bd4027 100644
--- a/default/fullPipeline_illumina_nanpore.yml
+++ b/default/fullPipeline_illumina_nanpore.yml
@@ -229,8 +229,11 @@ steps:
       defaultKingdom: false
       additionalParams: " --mincontiglen 500 "
     mmseqs2:
+      chunkSize: 20000
       kegg:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: s3://databases_internal/kegg-mirror-2021-01_mmseqs.tar.zst
@@ -238,29 +241,37 @@ steps:
             s5cmd:
               params: '--retry-count 30 --no-verify-ssl --endpoint-url https://openstack.cebitec.uni-bielefeld.de:8080'
       vfdb:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/vfdb_full_2022_07_29.tar.zst
             md5sum: 7e32aaed112d6e056fb8764b637bf49e
       bacmet20_experimental:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
             md5sum: 57a6d328486f0acd63f7e984f739e8fe
       bacmet20_predicted:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
             md5sum: 55902401a765fc460c09994d839d9b64
-      ncbi_nr:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+      uniref90:
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
-            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/nr_2023-04-29_mmseqs_taxonomy.tar.zst
-            md5sum: 79b9fb6b3dada41e602d70e12e7351c2
+            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/uniref90_20231108_mmseqs.tar.zst
+            md5sum: 313f2c031361091af2d5f3c6f6f46013
     rgi:
       # --include_loose includes matches of more distant homologs of AMR genes which may also report spurious partial matches
       # --include_nudge Partial ORFs may do not pass curated bitscore cut-offs or novel samples may contain divergent alleles, so nudging 
diff --git a/default/fullPipeline_illumina_nanpore_without_aggregate.yml b/default/fullPipeline_illumina_nanpore_without_aggregate.yml
index b9b58525..1d645380 100644
--- a/default/fullPipeline_illumina_nanpore_without_aggregate.yml
+++ b/default/fullPipeline_illumina_nanpore_without_aggregate.yml
@@ -140,7 +140,9 @@ steps:
       additionalParams: " --mincontiglen 500 "
     mmseqs2:
       kegg:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: s3://databases_internal/kegg-mirror-2021-01_mmseqs.tar.zst
@@ -148,29 +150,37 @@ steps:
             s5cmd:
               params: '--retry-count 30 --no-verify-ssl --endpoint-url https://openstack.cebitec.uni-bielefeld.de:8080'
       vfdb:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/vfdb_full_2022_07_29.tar.zst
             md5sum: 7e32aaed112d6e056fb8764b637bf49e
       bacmet20_experimental:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
             md5sum: 57a6d328486f0acd63f7e984f739e8fe
       bacmet20_predicted:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
             md5sum: 55902401a765fc460c09994d839d9b64
-      ncbi_nr:
-        params: ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+      uniref90:
+        additionalParams:
+          search : ' --max-seqs 300 --max-accept 50 -c 0.8 --cov-mode 0 --start-sens 4 --sens-steps 1 -s 6 --num-iterations 2 -e 0.001 --e-profile 0.01 --db-load-mode 3 '
+          additionalColumns: ""
         database:
           download:
-            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/nr_2023-04-29_mmseqs_taxonomy.tar.zst
-            md5sum: 79b9fb6b3dada41e602d70e12e7351c2
+            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/uniref90_20231108_mmseqs.tar.zst
+            md5sum: 313f2c031361091af2d5f3c6f6f46013
     mmseqs2_taxonomy:
       # Run taxonomy classification on MAGs and unbinable contigs or just the later
       runOnMAGs: true
diff --git a/docker/toolkit-mmseqs2/Dockerfile b/docker/toolkit-mmseqs2/Dockerfile
new file mode 100644
index 00000000..680d49e5
--- /dev/null
+++ b/docker/toolkit-mmseqs2/Dockerfile
@@ -0,0 +1,2 @@
+FROM ghcr.io/soedinglab/mmseqs2:15-6f452
+RUN apt update && apt install -y procps
diff --git a/docker/toolkit-mmseqs2/VERSION b/docker/toolkit-mmseqs2/VERSION
new file mode 100644
index 00000000..9de1d7d0
--- /dev/null
+++ b/docker/toolkit-mmseqs2/VERSION
@@ -0,0 +1 @@
+15-6f452-0
diff --git a/example_params/annotation.yml b/example_params/annotation.yml
index 49dbbe9a..19992a81 100644
--- a/example_params/annotation.yml
+++ b/example_params/annotation.yml
@@ -11,8 +11,11 @@ steps:
    annotation:
       input: "test_data/annotation/input_small.tsv"
       mmseqs2:
+        chunkSize: 20000
         kegg:
-          params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalParams:
+            search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+            additionalColumns: ""
           database:
             extractedDBPath: '/vol/spool/toolkit/kegg-mirror-2021-01_mmseqs/sequenceDB'
 #        bacmet20_experimental:
@@ -22,11 +25,14 @@ steps:
 #              source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
 #              md5sum: 57a6d328486f0acd63f7e984f739e8fe
         bacmet20_predicted:
-          params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
           database:
             download:
               source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
               md5sum: 55902401a765fc460c09994d839d9b64
+          additionalParams:
+            search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+            additionalColumns: ""
+
 #        vfdb:
 #          params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
 #          database:
diff --git a/example_params/fullPipeline.yml b/example_params/fullPipeline.yml
index fb81c108..0b398a1f 100644
--- a/example_params/fullPipeline.yml
+++ b/example_params/fullPipeline.yml
@@ -166,8 +166,11 @@ steps:
       defaultKingdom: false
       additionalParams: " --mincontiglen 200 "
     mmseqs2:
+      chunkSize: 20000
       kegg:
-        params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+        additionalParams:
+          search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalColumns: ""
         database:
           extractedDBPath: '/vol/spool/toolkit/kegg-mirror-2021-01_mmseqs/sequenceDB'
 
@@ -183,16 +186,23 @@ steps:
 #          download:
 #            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
 #            md5sum: 57a6d328486f0acd63f7e984f739e8fe
-      ncbi_nr:
-        params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+      uniref90:
+        additionalParams:
+          search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalColumns: ""
+
+
         database:
           download:
-            source: s3://databases/nr_2023-04-29_mmseqs_taxonomy/*
-            md5sum: 79b9fb6b3dada41e602d70e12e7351c2
+            source: s3://databases/uniref90_20231108_mmseqs/*
+            md5sum: 313f2c031361091af2d5f3c6f6f46013
             s5cmd:
               params: '--retry-count 30 --no-verify-ssl  --no-sign-request  --endpoint-url https://openstack.cebitec.uni-bielefeld.de:8080'
+
       bacmet20_predicted:
-        params: ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+        additionalParams:
+          search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
diff --git a/example_params/fullPipelineIlluminaOrONT.yml b/example_params/fullPipelineIlluminaOrONT.yml
index 5e44a4b8..85355b83 100644
--- a/example_params/fullPipelineIlluminaOrONT.yml
+++ b/example_params/fullPipelineIlluminaOrONT.yml
@@ -149,8 +149,11 @@ steps:
       defaultKingdom: false
       additionalParams: " --mincontiglen 200 "
     mmseqs2:
+      chunkSize: 20000
       kegg:
-        params: ' -s 2 --exact-kmer-matching 1 '
+        additionalParams:
+          search :  ' -s 2 --exact-kmer-matching 1 '
+          additionalColumns: ""
         database:
           extractedDBPath: '/vol/spool/toolkit/kegg-mirror-2021-01_mmseqs/sequenceDB'
 #      vfdb:
@@ -166,7 +169,9 @@ steps:
 #            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
 #            md5sum: 57a6d328486f0acd63f7e984f739e8fe
       bacmet20_predicted:
-        params: ' -s 2 --exact-kmer-matching 1 '
+        additionalParams:
+          search :  ' -s 2 --exact-kmer-matching 1 '
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
diff --git a/example_params/fullPipelineONT.yml b/example_params/fullPipelineONT.yml
index 6d48d014..dbf6658c 100644
--- a/example_params/fullPipelineONT.yml
+++ b/example_params/fullPipelineONT.yml
@@ -149,6 +149,7 @@ steps:
       defaultKingdom: false
       additionalParams: " --mincontiglen 200 "
     mmseqs2:
+      chunkSize: 20000
 #      vfdb:
 #        params: ' -s 2 --exact-kmer-matching 1 '
 #        database:
@@ -162,7 +163,9 @@ steps:
 #            source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_experimental.tar.zst
 #            md5sum: 57a6d328486f0acd63f7e984f739e8fe
       bacmet20_predicted:
-        params: ' -s 2 --exact-kmer-matching 1 '
+        additionalParams:
+          search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalColumns: ""
         database:
           download:
             source: https://openstack.cebitec.uni-bielefeld.de:8080/databases/bacmet20_predicted.tar.zst
diff --git a/example_params/fullPipeline_fraction/fullPipeline_fraction_binning_metabinner.yml b/example_params/fullPipeline_fraction/fullPipeline_fraction_binning_metabinner.yml
index 1fcdb12f..df779af3 100644
--- a/example_params/fullPipeline_fraction/fullPipeline_fraction_binning_metabinner.yml
+++ b/example_params/fullPipeline_fraction/fullPipeline_fraction_binning_metabinner.yml
@@ -40,8 +40,11 @@ steps:
       kmerSize: 4
   annotation:
     mmseqs2:
+      chunkSize: 7000
       kegg:
-        params: ' -s 2 --exact-kmer-matching 1 '
+        additionalParams:
+          search : ' -s 1 --max-seqs 100 --max-accept 50 --alignment-mode 1 --exact-kmer-matching 1 --db-load-mode 3'
+          additionalColumns: ""
         database:
           extractedDBPath: '/vol/spool/toolkit/kegg-mirror-2021-01_mmseqs/sequenceDB'
 resources:
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
index fa23c986..37c6bed1 100644
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -96,6 +96,7 @@ class Utils {
         }
     }
   }
+
   static Object[] flattenTuple(tupl){
   	def chunkList = [];
   	def SAMPLE_IDX = 0;
@@ -106,6 +107,31 @@ class Utils {
   	return chunkList;
   }
 
+  /*
+  * This method takes the number of entries of an input file (e.g. fasta entries in multi-fasta file),
+  * the maximum number of allowed entries per chunk and the actual input (e.g. file).
+  * It creates a list of indices of chunks of the input file based on the input parameters.
+  */
+  static List splitFilesIndex(seqCount, chunkSize, sample){
+    int chunk=seqCount.intdiv(chunkSize)
+    if(seqCount.mod(chunkSize) != 0){
+      chunk = chunk + 1
+    }
+    def chunks = []
+    for(def n : 1..chunk){
+      int start = (n-1) * chunkSize + 1
+     
+      int end = n * chunkSize
+    
+      if(end > seqCount){
+          end=seqCount
+      }
+      chunks.add(sample + [start, end, chunk])
+    }
+    return chunks
+  }
+
+
   static getMappingIdentityParam(medianQuality) {
     if(medianQuality > 17){
       return 97
diff --git a/modules/annotation/module.nf b/modules/annotation/module.nf
index 29b4425a..01b7993e 100644
--- a/modules/annotation/module.nf
+++ b/modules/annotation/module.nf
@@ -4,6 +4,7 @@ include { pDumpLogs } from '../utils/processes'
 
 import java.nio.file.Files
 import java.nio.file.Paths
+import java.util.stream.Collectors
 
 mode = 'mode_not_set'
 
@@ -23,7 +24,7 @@ def getOutput(SAMPLE, RUNID, TOOL, filename){
 * See “/lib/Utils.groovy” for more information.
 **/
 def constructParametersObject(String tool){ 
-  return params?.steps?.annotation?."$tool".findAll({ it.key != "runOnMAGs" }).collect{ Utils.getDockerMount(it.value?.database, params, 'true')}.join(" ")
+  return params?.steps?.annotation?."$tool".findAll({ it.key != "runOnMAGs" && it.key != "chunkSize" }).collect{ Utils.getDockerMount(it.value?.database, params, 'true')}.join(" ")
 }
 
 
@@ -58,25 +59,23 @@ process pMMseqs2 {
       // Another mount flag is used to get a key file (aws format) into the docker-container. 
 
       // This file is then used by s5cmd. The necessary mount points are generated by “constructParametersObject()”. 
-      containerOptions constructParametersObject("mmseqs2")
- 
+      containerOptions constructParametersObject("mmseqs2") + " --entrypoint='' "
+
       tag "Sample: $sample, Database: $dbType"
 
-      label 'highmemLarge'
+      label 'large'
 
       secret { "${S3_DB_ACCESS}"!="" ? ["S3_${dbType}_ACCESS", "S3_${dbType}_SECRET"] : [] } 
 
-      publishDir params.output, mode: "${params.publishDirMode}", saveAs: { filename -> getOutput("${sample}", params.runid, "mmseqs2/${dbType}", filename) }, \
-         pattern: "{**.blast.tsv}"
-
       when params?.steps.containsKey("annotation") && params?.steps.annotation.containsKey("mmseqs2")
 
    input:
       val(binType)
-      tuple val(sample), file(fasta), file(contig2GeneMapping), val(dbType), val(parameters), val(EXTRACTED_DB), val(DOWNLOAD_LINK), val(MD5SUM), val(S5CMD_PARAMS)
-   
+      tuple val(sample), file(fasta), file(contig2GeneMapping), val(start), val(stop), val(numberOfChunks), val(dbType), val(parameters), val(columns), val(EXTRACTED_DB), val(DOWNLOAD_LINK), val(MD5SUM), val(S5CMD_PARAMS)
+
    output:
-      tuple val("${dbType}"), val("${sample}"), val("${binType}"), path("${sample}_${binType}.${dbType}.blast.tsv"), optional:true, emit: blast
+      tuple val("${dbType}"), val("${sample}"), val("${binType}"), val("${start}"), val("${stop}"), val("${numberOfChunks}"),\
+	path("${sample}_${binType}.${start}.${stop}.${dbType}.blast.tsv"), optional:true, emit: blast
       tuple val("${sample}_${binType}"), val("${output}"), val(params.LOG_LEVELS.INFO), file(".command.sh"), \
         file(".command.out"), file(".command.err"), file(".command.log"), emit: logs
 
@@ -84,78 +83,8 @@ process pMMseqs2 {
    output = getOutput("${sample}", params.runid, "mmseqs2/${dbType}", "")
    S3_DB_ACCESS=params.steps?.annotation?.mmseqs2?."${dbType}"?.database?.download?.s5cmd && S5CMD_PARAMS.indexOf("--no-sign-request") == -1 ? "\$S3_${dbType}_ACCESS" : ""
    S3_DB_SECRET=params.steps?.annotation?.mmseqs2?."${dbType}"?.database?.download?.s5cmd && S5CMD_PARAMS.indexOf("--no-sign-request") == -1 ? "\$S3_${dbType}_SECRET" : ""
-   '''
-   mkdir -p !{params.polished.databases}
-   # if no local database is referenced, start download part
-   if [ -z "!{EXTRACTED_DB}" ] 
-   then
-         # polished params end with a / so no additional one is needed at the end
-         DATABASE=!{params.polished.databases}mmseqs2 
-         mkdir -p ${DATABASE}/!{dbType}
-         LOCK_FILE=${DATABASE}/!{dbType}/lock.txt
-
-	 # Check if access and secret keys are necessary for s5cmd
-         if [ ! -z "!{S3_DB_ACCESS}" ]
-         then
-            export AWS_ACCESS_KEY_ID=!{S3_DB_ACCESS}
-            export AWS_SECRET_ACCESS_KEY=!{S3_DB_SECRET}
-         fi
-         
-         # Create and try to lock the given “LOCK_FILE”. If the file is locked no other process will download the same database simultaneously
-         # and wait until the database is downloaded.  Depending on your database path (starts with s3:// | https:// | /../../.. ) 
-         # one of the flock input commands is executed to download and decompress the database.
-         # If a database is present at the given path, checksums are compared, if they are identical the download will be omitted.  
-         flock ${LOCK_FILE} concurrentDownload.sh --output=${DATABASE}/!{dbType} \
-            --link=!{DOWNLOAD_LINK} \
-            --httpsCommand="wget -qO- !{DOWNLOAD_LINK} | zstd -T!{task.cpus} -d -c | tar -xv " \
-            --s3FileCommand="s5cmd !{S5CMD_PARAMS} cat --concurrency !{task.cpus} !{DOWNLOAD_LINK} | zstd -T!{task.cpus} -d -c | tar -xv " \
-            --s3DirectoryCommand="s5cmd !{S5CMD_PARAMS} cp --concurrency !{task.cpus} !{DOWNLOAD_LINK} . " \
-	    --s5cmdAdditionalParams="!{S5CMD_PARAMS}" \
-            --localCommand="zstd -T!{task.cpus} -c -d !{DOWNLOAD_LINK} | tar -xv " \
-            --expectedMD5SUM=!{MD5SUM}
-          
-          # Path of the newly downloaded database. A mmseqs2 database consists out of multiple files,
-          # the unique lookup file is searched to determine the basename of each database.
-          FILEPATH=$(find ${DATABASE}/!{dbType} -name *.lookup -type f)
-          # remove extension to get the files basename.
-          MMSEQS2_DATABASE_DIR=${FILEPATH%.*}
-   else
-          # If an extracted database is present use that path. 
-          MMSEQS2_DATABASE_DIR="!{EXTRACTED_DB}"
-   fi
-    MMSEQS_HEADER="query,target,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,qlen,tlen,qcov,tcov"
-    OUTPUT_TMP_TSV="!{sample}_!{binType}.!{dbType}.blast.tmp.tsv"
-    OUTPUT_TSV="!{sample}_!{binType}.!{dbType}.blast.tsv"
-
-    # According to the MMSeqs docs the --split-memory-limit parameter defines the RAM usage for *about* 80 percent of the total RAM consumption.
-    # We set the ram limit parameter to 75 percent of the total available RAM to make sure to not run into out of memory errors.
-    RAM_LIMIT="$(awk -v RATIO=75 -v RAM=$(echo !{task.memory} | cut -f 1 -d ' ') 'BEGIN { print int(RAM / 100 * RATIO) }')G"
-
-    mkdir tmp
-    # Only mmseqs2 databases can be used for every kind of search. Inputs have to be converted first.
-    mmseqs createdb !{fasta} queryDB
-    # Load all indices into memory to increase searching speed
-    mmseqs touchdb --threads !{task.cpus} queryDB
-    mmseqs touchdb --threads !{task.cpus} ${MMSEQS2_DATABASE_DIR}
-    mmseqs search queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.results.database tmp !{parameters} \
-	--threads !{task.cpus} --split-memory-limit ${RAM_LIMIT}
-    # mmseqs2 searches produce output databases. These have to be converted to a more useful format. The blast -outfmt 6 in this case.
-    mmseqs convertalis queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.results.database ${OUTPUT_TMP_TSV} \
-	--threads !{task.cpus} --format-output ${MMSEQS_HEADER}
-
-    # Try only if the output file is not empty, csvtk will fail otherwise
-    if [ -s ${OUTPUT_TMP_TSV}  ]; then
-        # Add header
-        sed  -i "1i $(echo ${MMSEQS_HEADER} | tr ',' '\t')" ${OUTPUT_TMP_TSV}
-
-        # Add BIN_ID and SAMPLE column
-        csvtk -t -T join -f "locus_tag;query" \
-	    <(csvtk -t -T concat !{contig2GeneMapping} | csvtk -t -T cut -f SAMPLE,BIN_ID,CONTIG,locus_tag) ${OUTPUT_TMP_TSV} > ${OUTPUT_TSV}
-
-        # use "query" column name instead of "locus_tag"
-        sed -i -e "1s/locus_tag/query/" ${OUTPUT_TSV}
-    fi
-    '''
+   ADDITIONAL_COLUMNS = "${columns}".trim() == "" ? "" : ",${columns}"
+   template("mmseqs2.sh")
 }
 
 /**
@@ -175,7 +104,7 @@ process pMMseqs2_taxonomy {
       // Therefore this place has to be mounted to the docker container to be accessible during run time.
       // Another mount flag is used to get a key file (aws format) into the docker-container. 
       // This file is then used by s5cmd. The necessary mount points are generated by “constructParametersObject()”. 
-      containerOptions constructParametersObject("mmseqs2_taxonomy")
+      containerOptions constructParametersObject("mmseqs2_taxonomy") + " --entrypoint='' "
  
       tag "Sample: $sample, Database_taxonomy: $dbType"
 
@@ -252,22 +181,36 @@ process pMMseqs2_taxonomy {
              MMSEQS2_DATABASE_DIR="!{EXTRACTED_DB}"
       fi
 
+
+    # Check which mmseqs version is the right one
+    MMSEQS_VERSION=""
+    if grep -q "avx2" /proc/cpuinfo; then
+	echo "AVX2 supported, using mmseqs_avx2"
+	MMSEQS_VERSION="mmseqs_avx2"
+    elif grep -q "sse4_1" /proc/cpuinfo; then
+	echo "SSE4.1 supported, using mmseqs_sse41"
+	MMSEQS_VERSION="mmseqs_sse41"
+    else
+	echo "Using mmseqs_sse2"
+	MMSEQS_VERSION="mmseqs_sse2"
+    fi
+
     mkdir tmp
     # Only mmseqs2 databases can be used for every kind of search. Inputs have to be converted first.
-    mmseqs createdb !{fasta} queryDB
+    $MMSEQS_VERSION createdb !{fasta} queryDB
     # If the ramMode is set to true, the whole database will be loaded into the RAM. Do not forget to set the MMseqs2 parameter accordingly, --db-load-mode 3.
     if !{ramMode}
     then
         # Load all indices into memory to increase searching speed
-        mmseqs touchdb --threads !{task.cpus} queryDB
-        mmseqs touchdb --threads !{task.cpus} ${MMSEQS2_DATABASE_DIR}
+        $MMSEQS_VERSION touchdb --threads !{task.cpus} queryDB
+        $MMSEQS_VERSION touchdb --threads !{task.cpus} ${MMSEQS2_DATABASE_DIR}
     fi
     # Define taxonomies
-    mmseqs taxonomy queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database tmp !{parameters}  --start-sens 1 --sens-steps 1 -s !{sensitivity} --threads !{task.cpus}
+    $MMSEQS_VERSION taxonomy queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database tmp !{parameters} -s !{sensitivity} --threads !{task.cpus}
     # mmseqs2 searches produce output databases. These have to be converted to more useful formats.
-    mmseqs createtsv queryDB !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.taxonomy.tsv --threads !{task.cpus}
-    mmseqs taxonomyreport ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.krakenStyleTaxonomy.out
-    mmseqs taxonomyreport ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.krona.html --report-mode 1
+    $MMSEQS_VERSION createtsv queryDB !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.taxonomy.tsv --threads !{task.cpus}
+    $MMSEQS_VERSION taxonomyreport ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.krakenStyleTaxonomy.out
+    $MMSEQS_VERSION taxonomyreport ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.taxresults.database !{sample}_!{binType}.!{dbType}.krona.html --report-mode 1
    '''
 }
 
@@ -507,10 +450,10 @@ process pKEGGFromBlast {
    secret { "${S3_KEGG_ACCESS}"!="" ? ["S3_kegg_ACCESS", "S3_kegg_SECRET"] : [] } 
 
    input:
-      tuple val(sample), val(binType), file(blast_result)
+      tuple val(sample), val(binType), file(blastResult)
 
    output:
-      tuple val("${sample}"), path("${sample}_${binType}_keggPaths.tsv"), emit: kegg_paths
+      tuple val("${sample}"), path("*.keggPaths.tsv"), emit: keggPaths
       tuple val("${sample}_${binType}"), val("${output}"), val(params.LOG_LEVELS.INFO), file(".command.sh"), \
         file(".command.out"), file(".command.err"), file(".command.log"), emit: logs
 
@@ -553,7 +496,7 @@ process pKEGGFromBlast {
       else
          KEGG_DB="!{EXTRACTED_DB}"
       fi
-      blast2kegg.py !{blast_result} ${KEGG_DB} !{sample}_!{binType}_keggPaths.tsv
+      blast2kegg.py !{blastResult} ${KEGG_DB} !{blastResult.baseName}.keggPaths.tsv
       '''
 }
 
@@ -722,6 +665,98 @@ workflow wAnnotateList {
       proteins = _wAnnotation.out.prokka_faa
 }
 
+/**
+*
+* The pCount process is designed to count the number of sequences in a given FASTA file.
+* It uses the seqkit tool to calculate the sequence statistics of a FASTA file, specifically extracting the sequence count.
+*
+**/
+
+process pCount {
+
+    label 'tiny'
+
+    tag "Sample: $sample, BinID: $binID"
+
+    container "${params.ubuntu_image}"
+
+    input:
+    tuple val(sample), val(binID), path(fasta), path(metadata)
+
+    output:
+    tuple val("${sample}"), val("${binID}"), path(fasta), path(metadata), env(COUNT) 
+
+    shell:
+    '''
+    COUNT=$(seqkit stats -T <(cat !{fasta}) | cut -d$'\t' -f 4 | tail -n 1)
+    '''
+}
+
+/**
+*
+* The pCollectFile process is designed to collect and concatenate BLAST output files.
+* It uses the csvtk tool to concatenate all the .tsv files and then splits them based on the BIN_ID.
+*
+**/
+process pCollectFile {
+
+    label 'small'
+
+    tag "Sample: $sample, Database: $dbType"
+
+    container "${params.ubuntu_image}"
+
+    publishDir params.output, mode: "${params.publishDirMode}", saveAs: { filename -> getOutput("${sample}", params.runid, "mmseqs2/${dbType}", filename) }, \
+         pattern: "{**.blast.tsv}"
+
+    input:
+    tuple val(dbType), val(sample), val(type), path(blastOutputs)
+
+    output:
+    tuple val("${sample}"), val("${type}"), val("${dbType}"), path("*.blast.tsv", arity: "1..*")
+
+    shell:
+    '''
+    mkdir tmp
+    csvtk -t -T concat *.tsv > tmp/concat.tsv
+
+    # Column number of of BIN_ID column
+    COLUMN_NUMBER=$(sed 1q tmp/concat.tsv | tr -s '\t' '\n' | nl | grep -E 'BIN_ID' | cut -f 1 | tr -d ' ')
+    for bin in $(csvtk cut -t -T  -f BIN_ID tmp/concat.tsv | tail -n +2 | sort | uniq); do
+	FILE_NAME="${bin}_!{sample}_!{type}.!{dbType}.blast.tsv"
+	head -n 1 tmp/concat.tsv > ${FILE_NAME}
+	awk -F$'\t' -v filter=${bin} -v column=${COLUMN_NUMBER} ' $column == filter ' tmp/concat.tsv >> ${FILE_NAME}
+    done
+    '''
+}
+
+
+/*
+* 
+* This method splits the input file in chunks.
+*
+*/
+workflow _wSplit {
+  take:
+    input
+    chunkSize
+  main:
+    SAMPLE_IDX = 0
+    FILE_IDX = 1
+    FILE_2_IDX = 2
+    FILE_3_IDX = 3
+    COUNT_IDX=4
+    CHUNK_SIZE_IDX=5
+    ID_PLACEHOLDER = ""
+
+    input | map { dataset -> [dataset[SAMPLE_IDX].toString(), ID_PLACEHOLDER, dataset[FILE_IDX], dataset[FILE_2_IDX]] } \
+	| pCount | combine(chunkSize) | flatMap { sample -> \
+   	Utils.splitFilesIndex(Integer.parseInt(sample[COUNT_IDX]), sample[CHUNK_SIZE_IDX], [sample[SAMPLE_IDX], sample[FILE_2_IDX], sample[FILE_3_IDX]]) } \
+	| set { chunks }   	
+
+  emit:
+    chunks
+}
 
 /**
 *
@@ -749,8 +784,9 @@ workflow _wAnnotation {
       pProkka(prodigalMode, _wCreateProkkaInput.out.prokkaInput | combine(contigCoverage, by: SAMPLE_IDX))
       
       // Collect all databases
-      selectedDBs = params?.steps?.annotation?.mmseqs2.findAll().collect({ 
-            [it.key, it.value?.params ?: "", \
+      selectedDBs = params?.steps?.annotation?.mmseqs2.findAll({ it.key != "chunkSize" }).collect({
+            [it.key, it.value?.additionalParams?.search ?: "", \
+		it.value?.additionalParams?.additionalColumns ?: "", \
 	         it.value?.database?.extractedDBPath ?: "", \
              it.value.database?.download?.source ?: "", \
              it.value.database?.download?.md5sum ?: "", \
@@ -766,7 +802,6 @@ workflow _wAnnotation {
              it.value.database?.download?.md5sum ?: "", \
              it.value.database?.download?.s5cmd?.params ?: "" ]
       })
-
       PATH_IDX=2
 
       // The following groupTuple operators need a counter for the expected number of bins or fasta files of a sample.
@@ -779,28 +814,52 @@ workflow _wAnnotation {
       // Run all amino acid outputs against all databases
       // Collect by sample name to bundle searches and avoid calls with small input files
       pProkka.out.faa | map { [it[SAMPLE_IDX], it[PATH_IDX]] } | combine(fastaCounter, by:SAMPLE_IDX) \
-	| map { sample, path, size -> tuple( groupKey(sample, size), path ) }  | groupTuple() | set { groupedProkkaFaa }
+	| map { sample, path, size -> tuple( groupKey(sample, size), path ) } | groupTuple() | set { groupedProkkaFaa }
+
+      MMSEQS2_CHUNK_SIZE_DEFAULT=7000
 
-      groupedProkkaFaa | combine(contig2GeneMapping, by: SAMPLE_IDX) | combine(Channel.from(selectedDBs)) | set { combinedMMseqs }
+      // Split fasta files and run blast on each part in parallel
+      _wSplit(groupedProkkaFaa | combine(contig2GeneMapping, by: SAMPLE_IDX), \
+	Channel.from(params.steps?.annotation?.mmseqs2?.chunkSize?:MMSEQS2_CHUNK_SIZE_DEFAULT)) \
+           | combine(Channel.from(selectedDBs)) | set { combinedMMseqs }
 
       pMMseqs2(sourceChannel, combinedMMseqs)
+
+      FIRST_ELEM_IDX = 0
+      FIRST_ELEM_DB_IDX = 0
+      FIRST_ELEM_SAMPLE_IDX = 1
+      FIRST_ELEM_TYPE_IDX = 2
+
+      ELEM_PATH_IDX = 3
+      
+      pMMseqs2.out.blast \
+          // An artificial group key is created which consists of the db, sample name and the type of the data (e.g. binned)
+	| map { db, sample, type, start, stop, chunks, out -> [db + "_-_" + sample + "_-_" + type, db, sample, type, start, stop, chunks, out] } \
+        | map { key, db, sample, type, start, stop, chunks, out -> tuple(groupKey(key, chunks.toInteger()), [db, sample, type, out]) } \
+          // Based on the artificial group key and the expected number of chunks, the groupTuple operator is executed
+	| groupTuple() \
+          // All values of each entry are then reordered and prepared for the pCollectFile process input
+        | map { key, dataset -> [dataset[FIRST_ELEM_IDX][FIRST_ELEM_DB_IDX], dataset[FIRST_ELEM_IDX][FIRST_ELEM_SAMPLE_IDX], \
+	dataset[FIRST_ELEM_IDX][FIRST_ELEM_TYPE_IDX], dataset.stream().map{ elem -> elem[ELEM_PATH_IDX] }.collect()] } \
+	| pCollectFile \
+	  // The kegg database result is selected and reordered for the pKEGGFromBlast process input
+	| filter({ sample, type, dbType, blastFiles -> dbType == "kegg" }) \
+	| flatMap({ sample, type, dbType, blastFiles -> blastFiles.stream().map({ fi -> [sample, type, fi] }).collect() }) \
+        | pKEGGFromBlast
+
       combinedMMseqsTax = groupedProkkaFaa | combine(Channel.from(selectedTaxDBs))
       pMMseqs2_taxonomy(sourceChannel, combinedMMseqsTax)
       DB_TYPE_IDX = 0
-      pMMseqs2.out.blast | filter({ result -> result[DB_TYPE_IDX] == "kegg" }) \
-	| map({ result -> result.remove(0); result }) \
-	| set { mmseqs2Results } 
 
       // Run Resistance Gene Identifier with amino acid outputs
       pProkka.out.faa | pResistanceGeneIdentifier
-      pKEGGFromBlast(mmseqs2Results)
 
       pProkka.out.logs | mix(pMMseqs2.out.logs) \
         | mix(pMMseqs2_taxonomy.out.logs) \
 	| mix(pResistanceGeneIdentifier.out.logs) \
 	| mix(pKEGGFromBlast.out.logs) | pDumpLogs
    emit:
-      keggAnnotation = pKEGGFromBlast.out.kegg_paths
+      keggAnnotation = pKEGGFromBlast.out.keggPaths
       mmseqs2_kronaHtml = pMMseqs2_taxonomy.out.kronaHtml
       mmseqs2_krakenTaxonomy = pMMseqs2_taxonomy.out.krakenStyleTaxonomy
       mmseqs2_taxonomy = pMMseqs2_taxonomy.out.taxonomy
diff --git a/modules/plasmids/module.nf b/modules/plasmids/module.nf
index 71dc8da5..1739d61f 100644
--- a/modules/plasmids/module.nf
+++ b/modules/plasmids/module.nf
@@ -138,31 +138,6 @@ process pCount {
     '''
 }
 
-/*
-* This method takes number of entries in a input file (e.g. fata entries in multi fasta file),
-* the maximum number of allowed entries per chunk and the actual input (e.g. file).
-* It creates a list of indices of chunks of the input file based on the input parameters.
-*/
-def splitFilesIndex(seqCount, chunkSize, sample){
-  int chunk=seqCount.intdiv(chunkSize)
-  if(seqCount.mod(chunkSize) != 0){
-      chunk = chunk + 1
-  }
-  def chunks = []
-  for(def n : 1..chunk){
-      int start = (n-1) * chunkSize + 1
-     
-      int end = n * chunkSize
-    
-      if(end > seqCount){
-          end=seqCount
-      }
-      chunks.add(sample + [start, end])
-  }
-
-  return chunks
-}
-
 
 /*
 * 
@@ -180,8 +155,8 @@ workflow _wSplit {
     COUNT_IDX=3
     CHUNK_SIZE_IDX=4
     input | pCount | combine(chunkSize) | flatMap { sample -> \
-	splitFilesIndex(Integer.parseInt(sample[COUNT_IDX]), sample[CHUNK_SIZE_IDX], [sample[SAMPLE_IDX], sample[BIN_ID_IDX], sample[FILE_IDX]]) } \
-	| set { chunks }
+	Utils.splitFilesIndex(Integer.parseInt(sample[COUNT_IDX]), sample[CHUNK_SIZE_IDX], [sample[SAMPLE_IDX], sample[BIN_ID_IDX], sample[FILE_IDX]]) } \
+	| map({ sample, binID, binFile, start, end, chunkSize -> [sample, binID, binFile, start, end] }) | set { chunks }
   emit:
     chunks
 }
diff --git a/nextflow.config b/nextflow.config
index f5bc96db..49dbfc91 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -302,7 +302,7 @@ params {
    PlasClass_image = "quay.io/biocontainers/plasclass:0.1.1--pyhdfd78af_0"
    rgi_image = "quay.io/biocontainers/rgi:6.0.1--pyha8f3691_1"
    sans_image = "quay.io/metagenomics/toolkit-sans:0.1.0"
-   mmseqs2_image = "quay.io/microbiome-informatics/mmseqs:2.13"
+   mmseqs2_image = "quay.io/metagenomics/toolkit-mmseqs2:15-6f452-0"
    magscot_image = "quay.io/metagenomics/toolkit-magscot:1.0.0"
    kmc_image = "quay.io/biocontainers/kmc:3.2.1--hf1761c0_2"
 
diff --git a/templates/mmseqs2.sh b/templates/mmseqs2.sh
new file mode 100644
index 00000000..a0cc82c1
--- /dev/null
+++ b/templates/mmseqs2.sh
@@ -0,0 +1,88 @@
+mkdir -p !{params.polished.databases}
+# if no local database is referenced, start the download part
+if [ -z "!{EXTRACTED_DB}" ] 
+then
+# polished params end with a / so no additional one is needed at the end
+    DATABASE=!{params.polished.databases}mmseqs2 
+    mkdir -p ${DATABASE}/!{dbType}
+    LOCK_FILE=${DATABASE}/!{dbType}/lock.txt
+
+    # Check if access and secret keys are necessary for s5cmd
+    if [ ! -z "!{S3_DB_ACCESS}" ]
+    then
+        export AWS_ACCESS_KEY_ID=!{S3_DB_ACCESS}
+        export AWS_SECRET_ACCESS_KEY=!{S3_DB_SECRET}
+    fi
+         
+    # Create and try to lock the given “LOCK_FILE”. If the file is locked no other process will download the same database simultaneously
+    # and wait until the database is downloaded.  Depending on your database path (starts with s3:// | https:// | /../../.. ) 
+    # one of the flock input commands is executed to download and decompress the database.
+    # If a database is present at the given path, checksums are compared. If they are identical, the download will be omitted.  
+    flock ${LOCK_FILE} concurrentDownload.sh --output=${DATABASE}/!{dbType} \
+            --link=!{DOWNLOAD_LINK} \
+            --httpsCommand="wget -qO- !{DOWNLOAD_LINK} | zstd -T!{task.cpus} -d -c | tar -xv " \
+            --s3FileCommand="s5cmd !{S5CMD_PARAMS} cat --concurrency !{task.cpus} !{DOWNLOAD_LINK} | zstd -T!{task.cpus} -d -c | tar -xv " \
+            --s3DirectoryCommand="s5cmd !{S5CMD_PARAMS} cp --concurrency !{task.cpus} !{DOWNLOAD_LINK} . " \
+	    --s5cmdAdditionalParams="!{S5CMD_PARAMS}" \
+            --localCommand="zstd -T!{task.cpus} -c -d !{DOWNLOAD_LINK} | tar -xv " \
+            --expectedMD5SUM=!{MD5SUM}
+          
+    # Path of the newly downloaded database. A mmseqs2 database consists of multiple files,
+    # the unique lookup file is searched to determine the basename of each database.
+    FILEPATH=$(find ${DATABASE}/!{dbType} -name *.lookup -type f)
+    # remove the extension to get the files basename.
+    MMSEQS2_DATABASE_DIR=${FILEPATH%.*}
+else
+    # If an extracted database is present use that path. 
+    MMSEQS2_DATABASE_DIR="!{EXTRACTED_DB}"
+fi
+
+# Check which mmseqs version is the right one
+MMSEQS_VERSION=""
+if grep -q "avx2" /proc/cpuinfo; then
+	echo "AVX2 supported, using mmseqs_avx2"
+	MMSEQS_VERSION="mmseqs_avx2"
+elif grep -q "sse4_1" /proc/cpuinfo; then
+	echo "SSE4.1 supported, using mmseqs_sse41"
+	MMSEQS_VERSION="mmseqs_sse41"
+else
+	echo "Using mmseqs_sse2"
+	MMSEQS_VERSION="mmseqs_sse2"
+fi
+
+MMSEQS_HEADER="query,target,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,qlen,tlen,qcov,tcov!{ADDITIONAL_COLUMNS}"
+OUTPUT_TMP_TSV="!{sample}_!{binType}.!{start}.!{stop}.!{dbType}.blast.tmp.tsv"
+OUTPUT_TSV="!{sample}_!{binType}.!{start}.!{stop}.!{dbType}.blast.tsv"
+
+# According to the MMSeqs docs the --split-memory-limit parameter defines the RAM usage for *about* 80 percent of the total RAM consumption.
+RAM_LIMIT="$(awk -v RATIO=80 -v RAM=$(echo !{task.memory} | cut -f 1 -d ' ') 'BEGIN { print int(RAM / 100 * RATIO) }')G"
+
+mkdir tmp
+
+# Create new input file
+cat !{fasta} | seqkit range -r !{start}:!{stop} > input.fa
+
+# Only mmseqs2 databases can be used for every kind of search. Inputs have to be converted first.
+$MMSEQS_VERSION createdb input.fa queryDB
+# Load all indices into memory to increase searching speed
+$MMSEQS_VERSION touchdb --threads !{task.cpus} queryDB
+$MMSEQS_VERSION touchdb --threads !{task.cpus} ${MMSEQS2_DATABASE_DIR}
+$MMSEQS_VERSION search queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.results.database tmp !{parameters} \
+	--threads !{task.cpus} --split-memory-limit ${RAM_LIMIT}
+# mmseqs2 searches produce output databases. These have to be converted to a more useful format. The blast -outfmt 6 in this case.
+$MMSEQS_VERSION convertalis queryDB ${MMSEQS2_DATABASE_DIR} !{sample}_!{binType}.!{dbType}.results.database ${OUTPUT_TMP_TSV} \
+	--threads !{task.cpus} --format-output ${MMSEQS_HEADER}
+
+# Try only if the output file is not empty, csvtk will fail otherwise
+if [ -s ${OUTPUT_TMP_TSV}  ]; then
+     # Add header
+     sed  -i "1i $(echo ${MMSEQS_HEADER} | tr ',' '\t')" ${OUTPUT_TMP_TSV}
+
+     # Add BIN_ID and SAMPLE column
+     csvtk -t -T join -f "locus_tag;query" \
+	    <(csvtk -t -T concat !{contig2GeneMapping} | csvtk -t -T cut -f SAMPLE,BIN_ID,CONTIG,locus_tag) ${OUTPUT_TMP_TSV} > ${OUTPUT_TSV}
+
+     # use "query" column name instead of "locus_tag"
+     sed -i -e "1s/locus_tag/query/" ${OUTPUT_TSV}
+fi
+