diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 0000000..9cf5009
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,7 @@
+codecov:
+  ci:
+    - "travis.org"
+
+ignore:
+  - ".git/"
+  - "tests/"
diff --git a/.simplecov b/.simplecov
new file mode 100644
index 0000000..7b849f5
--- /dev/null
+++ b/.simplecov
@@ -0,0 +1,4 @@
+require 'codecov'
+require 'simplecov'
+
+SimpleCov.formatter = Codecov::SimpleCov::Formatter
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index 6773e89..ef6cc59 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,17 @@
 language: bash
+dist: focal
 
 before_install:
+  - gem install bashcov codecov
   - sudo apt-get install parallel
 
 script:
-    - tests/libs/bats/bin/bats tests/integration_offline.bats
+  - bashcov tests/libs/bats/bin/bats tests/integration_offline.bats
+
+after_success:
+  - curl -Os https://uploader.codecov.io/latest/linux/codecov
+  - chmod +x codecov
+  - ./codecov -f coverage/codecov-result.json -Z
 
 notifications:
   email: false
diff --git a/README.md b/README.md
index e18c571..0be195a 100755
--- a/README.md
+++ b/README.md
@@ -1,24 +1,50 @@
-# genome_updater
+# genome_updater [![Build Status](https://travis-ci.com/pirovc/genome_updater.svg?branch=master)](https://travis-ci.com/pirovc/genome_updater) [![codecov](https://codecov.io/gh/pirovc/genome_updater/branch/master/graph/badge.svg)](https://codecov.io/gh/pirovc/genome_updater) [![Anaconda-Server Badge](https://anaconda.org/bioconda/genome_updater/badges/downloads.svg)](https://anaconda.org/bioconda/genome_updater)
 
-Bash script to download and update snapshots of the NCBI genomes repository (refseq/genbank) [1] with several filters, detailed logs, reports, file integrity check (MD5) and parallel [2] download support.
+Bash script to download ***and update*** snapshots of the NCBI genomes repository (refseq/genbank) [1] with filters, detailed log, reports, file integrity check (MD5) and parallel [2] download support.
 
-With genome_updater you can download and keep several snapshots of a certain sub-set of the genomes repository, without redundancy and with incremental track of changes.
+## Quick usage guide
+
+### Get genome_updater
+
+	wget --quiet --show-progress https://raw.githubusercontent.com/pirovc/genome_updater/master/genome_updater.sh
+	chmod +x genome_updater.sh
+
+### Download
+
+Download Archaeal complete genome sequences from the refseq repository (`-t` number parallel downloads):
+
+	./genome_updater.sh -o "arc_refseq_cg" -d "refseq" -g "archaea" -l "complete genome" -f "genomic.fna.gz" -t 12
+
+### Update
+
+Some days later, update the repository:
+
+	./genome_updater.sh -o "arc_refseq_cg"
+
+ - Add `-k` to perform a dry-run, showing how many files will be downloaded/updated without any changes.
+
+ - Newly added sequences will be downloaded and a new version (`-b`, timestamp by default) will be created. Removed or old sequences will be kept but not carried to the new version.
+
+ - Arguments can be added or changed in the update. For example `./genome_updater.sh -o "arc_refseq_cg" -t 2` to use a different number of threads or `./genome_updater.sh -o "arc_refseq_cg" -l ""` to remove the "complete genome" filter.
+
+ - `history.tsv` will be created in the output folder (`-o`), tracking versions and arguments used (obs: boolean flags/arguments are not tracked - e.g. `-m`).
 
 ## Details
 
-- genome_updater runs on a working directory (defined with `-o`) and creates a snapshot (`-b`) of refseq and/or genbank (`-d`) genome repositories based on selected organism groups (`-g`) and/or taxonomic ids (`-S`/`-T`) with the desired files type(s) (`-f`)
-- filters can be applied to refine the selection: RefSeq category (`-c`), assembly level (`-l`), dates (`-D`/`-E`), custom filters (`-F`), top assemblies (`-P`/`-A`), GTDB [3] compatible sequences (`-z`).
-- the repository can updated (e.g. after some days) with only incremental changes. genome_updater will identify previous files and update the working directory with the most recent versions, keeping track of all changes and just downloading/removing what is necessary
+genome_updater downloads and keeps several snapshots of a certain sub-set of the genomes repository, without redundancy and with incremental track of changes.
 
-## Installation
+- it runs on a working directory (defined with `-o`) and creates a snapshot (optionally named with `-b`, timestamp by default) of refseq and/or genbank (`-d`) genome repositories based on selected organism groups (`-g`) and/or taxonomic ids (`-T`) with the desired files type(s) (`-f`)
+- filters can be applied to refine the selection: refseq category (`-c`), assembly level (`-l`), dates (`-D`/`-E`), custom filters (`-F`), [top assemblies](#Top-assemblies) (`-A`)
+- `-M gtdb` enables GTDB [3] compability. Only assemblies from the latest GTDB release will be kept and taxonomic filters will work based on GTDB nodes (e.g. `-T "c__Hydrothermarchaeia"` or `-A genus:3`)
+- the repository can be updated or changed with incremental changes. outdated files are kept in their respective version and repeated files linked to the new version. genome_updater keepts track of all changes and just downloads what is necessary
 
-[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/genome_updater/README.html)
+## Installation
 
 With conda:
 
 	conda install -c bioconda genome_updater 
 
-or simply download the raw file and give execution permissions:
+or direct file download:
 
 	wget https://raw.githubusercontent.com/pirovc/genome_updater/master/genome_updater.sh
 	chmod +x genome_updater.sh
@@ -31,72 +57,62 @@ To test if all genome_updater functions are running properly on your system:
 	cd genome_updater
 	tests/test.sh
 
-## Usage
-
-Downloads complete genome sequences from Archaea in the RefSeq repository (`-t` number parallel downloads, `-m` checks download completeness):
-
-	./genome_updater.sh -g "archaea" -d "refseq" -l "complete genome" -f "genomic.fna.gz" -o "arc_refseq_cg" -t 12 -m
-
- - Add `-k` to perform a dry-run before the actual run. genome_updater will show how many files will be downloaded or updated and exit without changes
- - The *same command* executed again (e.g. some days later), will update the snapshot of the requested dataset to its latest state, accounting for new, updated and removed sequences.
- - `history.tsv` will be created in the output folder, tracking versions and arguments used
-
-## Options
-
-Data selection:
-- `-d`: database selection (genbank and/or refseq)
-- `-g`: organism groups (`-g "archaea,bacteria"`)
-- `-S`: species taxids (`-S "562,623"`)
-- `-T`: any taxids including all children nodes (`-T "620,1643685"`)
-- `-f`: files to be downloaded [genomic.fna.gz,assembly_report.txt, ... - check ftp://ftp.ncbi.nlm.nih.gov/genomes/all/README.txt for all file formats]
-- `-l`: filter by Assembly level [complete genome, chromosome, scaffold, contig]
-- `-c`: filter by RefSeq Category [reference genome, representative genome, na]
-- `-P`: select [top assemblies](#top-assemblies) for species entries. `-P 3` downloads the top 3 assemblies for each species
-- `-A`: select [top assemblies](#top-assemblies) for taxids entries. `-A 3` downloads the top 3 assemblies for each taxid selected
-- `-D`: filter entries published on or after this date
-- `-E`: filter entries published on or before this date
-- `-z`: select only assemblies included in the latest GTDB release
-
-Utilities:
-- `-i`: fixes current snapshot in case of network or any other failure during download
-- `-k`: dry-run - do not perform any action but shows number of files to be downloaded or updated
-- `-t`: downloads in parallel
-- `-m`: checks for file integrity (MD5)
-- `-e`: re-downloads entries from any "assembly_summary.txt" obtained from external sources. Easy way to share snapshots of exact database version used.
-- `-a`: downloads the current version of the NCBI taxonomy database (taxdump.tar.gz)
-
-Reports:
-- `-u`: Added/Removed assembly accessions
-- `-r`: Added/Removed sequence accessions 
-- `-p`: Output list of URLs for downloaded and failed files
-
-Version control:
-- `-b`: name a version under a label (timestamp by default)
-- `-B`: when updating, use a different label as a base version. Useful for rolling back updates or to branch out of a base version.
-
 ## Examples
 
-### Downloading genomic sequences (.fna files) for the Complete Genome sequences from RefSeq for Bacteria and Archaea and keep them updated
+### Archaea, Bacteria, Fungi and Viral complete genome sequences from refseq
 
-	# Download (checking md5, 12 threads, with extended assembly accession report)
-	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "Complete Genome" -f "genomic.fna.gz" -o "arc_bac_refseq_cg" -t 12 -u -m
+	# Download (-m to check integrity of downloaded files)
+	./genome_updater.sh -d "refseq" -g "archaea,bacteria,fungi,viral" -f "genomic.fna.gz" -o "arc_bac_fun_vir_refseq_cg" -t 12 -m
 	
-	# Downloading additional .gbff files for the current snapshot (adding genomic.gbff.gz to -f and adding -i command)
-	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "Complete Genome" -f "genomic.fna.gz,genomic.gbff.gz" -o "arc_bac_refseq_cg" -t 12 -u -m -i
+	# Update (e.g. some days later)
+	./genome_updater.sh -o "arc_bac_fun_vir_refseq_cg" -m
 	
-	# Some days later, just check for updates but do not update
-	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "Complete Genome" -f "genomic.fna.gz,genomic.gbff.gz" -o "arc_bac_refseq_cg" -k
+### All RNA Viruses (under the taxon Riboviria) on refseq
 
-	# Perform update
-	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "Complete Genome" -f "genomic.fna.gz,genomic.gbff.gz" -o "arc_bac_refseq_cg" -t 12 -u -m
+	./genome_updater.sh -d "refseq" -T "2559587" -f "genomic.fna.gz" -o "all_rna_virus" -t 12 -m
+	
+### One genome assembly for each bacterial taxonomic node (leaves) in genbank
+    
+    ./genome_updater.sh -d "genbank" -g "bacteria" -f "genomic.fna.gz" -o "top1_bacteria_genbank" -A 1 -t 12 -m 
+    
+### One genome assembly for each bacterial species in genbank
+    
+    ./genome_updater.sh -d "genbank" -g "bacteria" -f "genomic.fna.gz" -o "top1species_bacteria_genbank" -A "species:1" -t 12 -m 
+    
+### All genome sequences used in the latests GTDB release
+
+	./genome_updater.sh -d "refseq,genbank" -g "archaea,bacteria" -f "genomic.fna.gz" -o "GTDB_complete" -M "gtdb" -t 12 -m
+	
+### Two genome assemblies for every genus in GTDB
+    
+    ./genome_updater.sh -d "refseq,genbank" -g "archaea,bacteria" -f "genomic.fna.gz" -o "GTDB_top2genus" -M "gtdb" -A "genus:2" -t 12 -m
 
-### Download all RNA Viruses (under the taxon Riboviria) on RefSeq
+### All assemblies from a specific family in GTDB
+    
+    ./genome_updater.sh -d "refseq,genbank" -g "archaea,bacteria" -f "genomic.fna.gz" -o "GTDB_family_Gastranaerophilaceae" -M "gtdb" -T "f__Gastranaerophilaceae" -t 12 -m
 
-	./genome_updater.sh -d "refseq" -T "2559587" -f "genomic.fna.gz" -o "all_rna_virus" -t 12
+### Recovering fasta files from a previously obtained assembly_summary.txt
+
+	./genome_updater.sh -e /my/path/assembly_summary.txt -f "genomic.fna.gz" -o "recovered_sequences"
+
+## Advanced examples
 
-### Download all genome sequences used in the latests GTDB release
+### Downloading genomic sequences (.fna files) for the Complete Genome sequences from RefSeq for Bacteria and Archaea and keep them updated
+
+	# Dry-run to check files available
+	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "complete genome" -f "genomic.fna.gz" -k
+	
+	# Download (-o output folder, -t threads, -m checking md5, -u extended assembly accession report)
+	./genome_updater.sh -d "refseq" -g "archaea,bacteria" -l "complete genome" -f "genomic.fna.gz" -o "arc_bac_refseq_cg" -t 12 -u -m
+	
+	# Downloading additional .gbff files for the current snapshot (adding genomic.gbff.gz to -f , -i to just add files and not update)
+	./genome_updater.sh -f "genomic.fna.gz,genomic.gbff.gz" -o "arc_bac_refseq_cg" -i
+	
+	# Some days later, just check for updates but do not update
+	./genome_updater.sh -o "arc_bac_refseq_cg" -k
 
-	./genome_updater.sh -d "refseq,genbank" -f "genomic.fna.gz" -o "GTDB" -z -t 12
+	# Perform update
+	./genome_updater.sh -o "arc_bac_refseq_cg" -u -m
 
 ### Branching base version for specific filters
 
@@ -104,62 +120,22 @@ Version control:
 	./genome_updater.sh -d "refseq" -g "bacteria" -f "genomic.fna.gz" -o "bac_refseq" -t 12 -m -b "all"
 
 	# Branch the main files into two sub-versions (no new files will be downloaded or copied)
-	./genome_updater.sh -d "refseq" -g "bacteria" -f "genomic.fna.gz" -o "bac_refseq" -t 12 -m -B "all" -b "complete" -l "complete genome"
-	./genome_updater.sh -d "refseq" -g "bacteria" -f "genomic.fna.gz" -o "bac_refseq" -t 12 -m -B "all" -b "representative" -c "representative genome"
-
-### Download one genome assembly for each bacterial species in genbank
-
-	./genome_updater.sh -d "genbank" -g "bacteria" -f "genomic.fna.gz" -o "top1_bacteria_genbank" -t 12 -P 1
-
-### Download all E. Coli assemblies available on GenBank and RefSeq under a label (v1)
-
-	./genome_updater.sh -d "genbank,refseq" -S "562" -f "genomic.fna.gz" -o "all_ecoli" -t 12 -b v1
-
-### Check amount of reference entries available for the set of Viral genomes on genbank
-
-	./genome_updater.sh -d "genbank" -g "viral" -k
+	./genome_updater.sh -o "bac_refseq" -B "all" -b "complete" -l "complete genome"
+	./genome_updater.sh -o "bac_refseq" -B "all" -b "represen" -c "representative genome"
 
 ### Download Fungi RefSeq assembly information and generate sequence reports and URLs
 
-	./genome_updater.sh -d "refseq" -g "fungi" -f "assembly_report.txt" -o "fungi" -t 12 -r -p
+	./genome_updater.sh -d "refseq" -g "fungi" -f "assembly_report.txt" -o "fungi" -t 12 -rpu
 
-### Recovering fasta files from a previously obtained assembly_summary.txt
+### Use curl (default wget), change timeout and retries for download, increase retries
 
-	./genome_updater.sh -e /my/path/assembly_summary.txt -f "genomic.fna.gz" -o "recovered_sequences" -b "january_2018"
+	retries=10 timeout=600 ./genome_updater.sh -g "fungi" -o fungi -t 12 -f "genomic.fna.gz,assembly_report.txt" -L curl -R 6
 
-### Use curl, change timeout and retries for download (default wget)
-
-	retries=10 timeout=600 use_curl=1 ./genome_updater.sh -g "fungi" -o fungi -t 12 -f "genomic.fna.gz,assembly_report.txt"
-
-## Top assemblies
-
-The top assemblies (`-P`/`-A`) will be selected based on the species/taxid entries in the assembly_summary.txt and not for the taxids provided with  (`-S`/`-T`). They are selected sorted by categories in the following order of importance:
-	
-	A) RefSeq Category: 
-		1) reference genome
-		2) representative genome
-		3) na
-	B) Assembly level:
-		1) Complete genome
-		2) Chromosome
-		3) Scaffold
-		4) Contig
-	C) Relation to type material:
-		1) assembly from type material
-		2) assembly from synonym type material
-		3) assembly from pathotype material
-		4) assembly designated as neotype
-		5) assembly designated as reftype
-		6) ICTV species exemplar
-		7) ICTV additional isolate
-	D) Date:
-		1) Most recent first
-
-## Extended reports
+## Reports
 
 ### assembly accessions
 
-The parameter `-u` activates the output of a list of updated assembly accessions for the entries with all files (`-f`) successfully downloaded. The file `updated_assembly_accession.txt` has the following fields (tab separated):
+The parameter `-u` activates the output of a list of updated assembly accessions for the entries with all files (`-f`) successfully downloaded. The file `{timestamp}_assembly_accession.txt` has the following fields (tab separated):
 
 	Added [A] or Removed [R], assembly accession, url
 
@@ -171,7 +147,7 @@ Example:
 
 ### sequence accessions
 
-The parameter `-r` activates the output of a list of updated sequence accessions for the entries with all files (`-f`) successfully downloaded. It is only available when `assembly_report.txt` is one of the file types. The file `updated_sequence_accession.txt` has the following fields (tab separated):
+The parameter `-r` activates the output of a list of updated sequence accessions for the entries with all files (`-f`) successfully downloaded. It is only available when `assembly_report.txt` is one of the file types. The file `{timestamp}_sequence_accession.txt` has the following fields (tab separated):
 
 	Added [A] or Removed [R], assembly accession, genbank accession, refseq accession, sequence length, taxonomic id
 
@@ -180,7 +156,7 @@ Example:
 	A	GCA_000243255.1	CM001436.1	NZ_CM001436.1	3200946	937775
 	R	GCA_000275865.1	CM001555.1	NZ_CM001555.1	2475100	28892
 
-* genome_updater fixes the current version of the database before updating (or just fix with `-i`). In this step if some entry is fixed and the reports are active, all lines are going to be reported as Added.
+Obs: if genome_updater breaks or do not finish completely some files may be missing from the assembly and sequence accession reports
 
 ### URLs (and files)
 
@@ -194,78 +170,124 @@ or
 
 	find output_folder/version/files/ -type f
 
+## Top assemblies
+
+`-A` will selected the "best" assemblies for each taxonomic nodes (leaves or specific rank) according to 4 categories (A-D), in the following order of importance:
+
+	A) refseq Category: 
+		1) reference genome
+		2) representative genome
+		3) na
+	B) Assembly level:
+		1) Complete genome
+		2) Chromosome
+		3) Scaffold
+		4) Contig
+	C) Relation to type material:
+		1) assembly from type material
+		2) assembly from synonym type material
+		3) assembly from pathotype material
+		4) assembly designated as neotype
+		5) assembly designated as reftype
+		6) ICTV species exemplar
+		7) ICTV additional isolate
+	D) Date:
+		1) Most recent first
+
+
 ## Parameters
 
 	┌─┐┌─┐┌┐┌┌─┐┌┬┐┌─┐    ┬ ┬┌─┐┌┬┐┌─┐┌┬┐┌─┐┬─┐
 	│ ┬├┤ ││││ ││││├┤     │ │├─┘ ││├─┤ │ ├┤ ├┬┘
 	└─┘└─┘┘└┘└─┘┴ ┴└─┘────└─┘┴  ─┴┘┴ ┴ ┴ └─┘┴└─
-	                                     v0.4.1 
+	                                     v0.5.0 
 
 	Database options:
-	 -d Database (comma-separated entries) [genbank, refseq]
+	 -d Database (comma-separated entries)
+		[genbank, refseq]
 
 	Organism options:
-	 -g Organism group (comma-separated entries) [archaea, bacteria, fungi, human, invertebrate, metagenomes, other, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]. Example: archaea,bacteria.
+	 -g Organism group(s) (comma-separated entries, empty for all)
+		[archaea, bacteria, fungi, human, invertebrate, metagenomes, 
+		other, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]
 		Default: ""
-	 -S Species level taxonomic ids (comma-separated entries). Example: 622,562
-		Default: ""
-	 -T Any taxonomic ids - children lineage will be generated (comma-separated entries). Example: 620,649776
+	 -T Taxonomic identifier(s) (comma-separated entries, empty for all).
+		Example: "562" (for -M ncbi) or "s__Escherichia coli" (for -M gtdb)
 		Default: ""
 
 	File options:
-	 -f files to download [genomic.fna.gz,assembly_report.txt, ...] check ftp://ftp.ncbi.nlm.nih.gov/genomes/all/README.txt for all file formats
+	 -f file type(s) (comma-separated entries)
+		[genomic.fna.gz, assembly_report.txt, protein.faa.gz, genomic.gbff.gz]
+		More formats at https://ftp.ncbi.nlm.nih.gov/genomes/all/README.txt
 		Default: assembly_report.txt
 
 	Filter options:
-	 -c refseq category (comma-separated entries, empty for all) [reference genome, representative genome, na]
+	 -c refseq category (comma-separated entries, empty for all)
+		[reference genome, representative genome, na]
 		Default: ""
-	 -l assembly level (comma-separated entries, empty for all) [complete genome, chromosome, scaffold, contig]
+	 -l assembly level (comma-separated entries, empty for all)
+		[complete genome, chromosome, scaffold, contig]
 		Default: ""
-	 -P Number of top references for each species nodes to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)
-		Default: 0
-	 -A Number of top references for each taxids (leaf nodes) to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)
-		Default: 0
-	 -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive). Example: -F "2:PRJNA12377,PRJNA670754|14:Partial" for column infos check ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt
+	 -D Start date (>=), based on the sequence release date. Format YYYYMMDD.
 		Default: ""
-	 -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: 20201030
+	 -E End date (<=), based on the sequence release date. Format YYYYMMDD.
 		Default: ""
-	 -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: 20201231
+	 -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive).
+		Example: -F "2:PRJNA12377,PRJNA670754|14:Partial" (AND between cols, OR between values)
+		Column info at https://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt
 		Default: ""
-	 -z Keep only assemblies present on the latest GTDB release
 
-	Report options:
-	 -u Report of updated assembly accessions (Added/Removed, assembly accession, url)
-	 -r Report of updated sequence accessions (Added/Removed, assembly accession, genbank accession, refseq accession, sequence length, taxid). Only available when file format assembly_report.txt is selected and successfully downloaded
-	 -p Output list of URLs for downloaded and failed files
+	Taxonomy options:
+	 -M Taxonomy. gtdb keeps only assemblies in GTDB (R207). ncbi keeps only latest assemblies (version_status). 
+		[ncbi, gtdb]
+		Default: "ncbi"
+	 -A Keep a limited number of assemblies for each selected taxa (leaf nodes). 0 for all. 
+		Selection by ranks are also supported with rank:number (e.g genus:3)
+		[species, genus, family, order, class, phylum, kingdom, superkingdom]
+		Selection order based on: RefSeq Category, Assembly level, Relation to type material, Date.
+		Default: 0
+	 -a Keep the current version of the taxonomy database in the output folder
 
 	Run options:
 	 -o Output/Working directory 
 		Default: ./tmp.XXXXXXXXXX
+	 -t Threads to parallelize download and some file operations
+		Default: 1
+	 -k Dry-run mode. No sequence data is downloaded or updated - just checks for available sequences and changes
+	 -i Fix only mode. Re-downloads incomplete or failed data from a previous run. Can also be used to change files (-f).
+	 -m Check MD5 of downloaded files
+
+	Report options:
+	 -u Updated assembly accessions report
+		(Added/Removed, assembly accession, url)
+	 -r Updated sequence accessions report
+		(Added/Removed, assembly accession, genbank accession, refseq accession, sequence length, taxid)
+		Only available when file format assembly_report.txt is selected and successfully downloaded
+	 -p Reports URLs successfuly downloaded and failed (url_failed.txt url_downloaded.txt)
+
+	Misc. options:
 	 -b Version label
 		Default: current timestamp (YYYY-MM-DD_HH-MM-SS)
 	 -e External "assembly_summary.txt" file to recover data from. Mutually exclusive with -d / -g 
 		Default: ""
+	 -B Alternative version label to use as the current version. Mutually exclusive with -i.
+		Can be used to rollback to an older version or to create multiple branches from a base version.
+		Default: ""
 	 -R Number of attempts to retry to download files in batches 
 		Default: 3
-	 -B Base label to use as the current version. Can be used to rollback to an older version or to create multiple branches from a base version. It only applies for updates. 
-		Default: ""
-	 -k Dry-run, no data is downloaded or updated - just checks for available sequences and changes
-	 -i Fix failed downloads or any incomplete data from a previous run, keep current version
-	 -m Check MD5 of downloaded files
-	 -t Threads to parallelize download and some file operations
-		Default: 1
-
-	Misc. options:
-	 -x Allow the deletion of regular extra files if any found in the files folder. Symbolic links that do not belong to the current version will always be deleted.
-	 -a Download the current version of the NCBI taxonomy database (taxdump.tar.gz)
-	 -s Silent output
-	 -w Silent output with download progress (%) and download version at the end
-	 -n Conditional exit status. Exit Code = 1 if more than N files failed to download (integer for file number, float for percentage, 0 -> off)
+	 -n Conditional exit status based on number of failures accepted, otherwise will Exit Code = 1.
+		Example: -n 10 will exit code 1 if 10 or more files failed to download
+		[integer for file number, float for percentage, 0 = off]
 		Default: 0
-	 -V Verbose log to report successful file downloads
+	 -L Downloader
+		[wget, curl]
+		Default: wget
+	 -x Allow the deletion of regular extra files (not symbolic links) found in the output folder
+	 -s Silent output
+	 -w Silent output with download progress only
+	 -V Verbose log
 	 -Z Print debug information and run in debug mode
 
-
 ## References:
 
 [1] ftp://ftp.ncbi.nlm.nih.gov/genomes/
diff --git a/genome_updater.sh b/genome_updater.sh
index 9df41b4..8f0dad8 100755
--- a/genome_updater.sh
+++ b/genome_updater.sh
@@ -25,9 +25,7 @@ IFS=$' '
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-version="0.4.1"
-genome_updater_args=$( printf "%q " "$@" )
-export genome_updater_args
+version="0.5.0"
 
 # Define base_url or use local files (for testing)
 local_dir=${local_dir:-}
@@ -39,28 +37,19 @@ base_url=${base_url:-ftp://ftp.ncbi.nlm.nih.gov/} #Alternative ftp://ftp.ncbi.ni
 retries=${retries:-3}
 timeout=${timeout:-120}
 export retries timeout base_url local_dir
-use_curl=${use_curl:-0}
 
 # Export locale numeric to avoid errors on printf in different setups
 export LC_NUMERIC="en_US.UTF-8"
 
-gtdb_urls=( "https://data.gtdb.ecogenomic.org/releases/latest/ar53_taxonomy.tsv.gz" 
-            "https://data.gtdb.ecogenomic.org/releases/latest/bac120_taxonomy.tsv.gz" )
-
 #activate aliases in the script
 shopt -s expand_aliases
 alias sort="sort --field-separator=$'\t'"
-
-# Define downloader to use
-if [[ ! -z "${local_dir}" || "${use_curl}" -eq 1 ]]; then
-    alias downloader="curl --silent --retry ${retries} --connect-timeout ${timeout} --output "
-else
-    alias downloader="wget --quiet --continue --tries ${retries} --read-timeout ${timeout} --output-document "
-fi
+join_as_fields1="1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22,1.23"
+join_as_fields2="2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12,2.13,2.14,2.15,2.16,2.17,2.18,2.19,2.20,2.21,2.22,2.23"
 
 download_url() # parameter: ${1} url, ${2} output file/directory (omit/empty to STDOUT)
 {
-    url=${1}
+    url="${1}"
     outfiledir="${2:-}"
     if [[ ! -z "${outfiledir}" ]]; then
         if [[ -d "${outfiledir}" ]]; then
@@ -71,15 +60,34 @@ download_url() # parameter: ${1} url, ${2} output file/directory (omit/empty to
     else
         outfile="-" # STDOUT
     fi
+
     # Replace base url with local directory if provided
-    if [[ ! -z "${local_dir}" ]]; then url=${url/${url%/genomes/*}/${local_dir}}; fi
+    if [[ ! -z "${local_dir}" ]]; then 
+        url="${local_dir}/${url#*://*/}";
+    fi
     downloader "${outfile}" "${url}"
 }
 export -f download_url  #export it to be accessible to the parallel call
 
-download_static() # parameter: ${1} url, ${2} output file
-{
-    downloader ${2} ${1}
+download_retry_md5(){ # parameter: ${1} url, ${2} output file, ${3} url MD5, ${4} re-tries
+    for (( att=1; att<=${4:-1}; att++ )); do
+        if [ "${att}" -gt 1 ]; then
+            echolog " - Failed to download ${url}. Trying again #${att}" "1"
+        fi
+        download_url "${1}" "${2}"
+        real_md5=$(download_url "${3}" | grep "${1##*/}" | cut -f1 -d' ')
+        if [ -z "${real_md5}" ]; then
+            continue; # did not find url file on md5 file (or empty), try again
+        else
+            file_md5=$(md5sum ${2} | cut -f1 -d' ')
+            if [ "${file_md5}" != "${real_md5}" ]; then
+                continue; # md5 didn't match, try again
+            else
+                return 0; # md5 matched, return success
+            fi    
+        fi
+    done
+    return 1; # failed to check md5 after all attempts
 }
 
 unpack() # parameter: ${1} file, ${2} output folder[, ${3} files to unpack]
@@ -87,94 +95,185 @@ unpack() # parameter: ${1} file, ${2} output folder[, ${3} files to unpack]
     tar xf "${1}" -C "${2}" "${3}"
 }
 
-count_lines(){ # parameter: ${1} file - return number of lines
+count_lines() # parameter: ${1} file - return number of lines
+{
     echo ${1:-} | sed '/^\s*$/d' | wc -l | cut -f1 -d' '
 }
 
-count_lines_file(){ # parameter: ${1} file - return number of lines
+count_lines_file() # parameter: ${1} file - return number of lines
+{
     sed '/^\s*$/d' ${1:-} | wc -l | cut -f1 -d' '
 }
 
-parse_new_taxdump() # parameter: ${1} taxids - return all taxids on of provided taxids
+check_assembly_summary() # parameter: ${1} assembly_summary file - return 0 true 1 false
 {
-    taxids=${1}
-    tmp_new_taxdump="${target_output_prefix}new_taxdump.tar.gz"
-    download_static "${base_url}/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" "${tmp_new_taxdump}"
-    unpack "${tmp_new_taxdump}" "${working_dir}" "taxidlineage.dmp"
-    tmp_taxidlineage="${working_dir}taxidlineage.dmp"
-    tmp_lineage=${working_dir}lineage.tmp
-    for tx in ${taxids//,/ }; do
-        txids_lin=$(grep "[^0-9]${tx}[^0-9]" "${tmp_taxidlineage}" | cut -f 1) #get only taxids in the lineage section
-        echolog " - $(count_lines "${txids_lin}") children taxids in the lineage of ${tx}" "0"
-        echo "${txids_lin}" >> "${tmp_lineage}" 
-    done
-    lineage_taxids=$(sort ${tmp_lineage} | uniq | tr '\n' ',')${taxids} # put lineage back into the taxids variable with the provided taxids
-    rm "${tmp_new_taxdump}" "${tmp_taxidlineage}" "${tmp_lineage}"
-    echo "${lineage_taxids}"
+    # file exists and it's not empty
+    if [ ! -s "${1}" ]; then return 1; fi
+
+    # Last char is empty (line break)
+    if [ ! -z $(tail -c -1 "${1}") ]; then return 1; fi
+
+    # if contains header char parts of the header anywhere starting lines
+    grep -m 1 "^#" "${1}" #> /dev/null
+    if [ $? -eq 0 ]; then return 1; fi
+
+    # if contains parts of the header anywhere
+    ##   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+    grep -m 1 "ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt" "${1}" > /dev/null 2>&1
+    if [ $? -eq 0 ]; then return 1; fi
+    # assembly_accession    bioproject  biosample   wgs_master  refseq_category taxid   species_taxid   organism_name   infraspecific_name  isolate version_status  assembly_levelrelease_type  genome_rep  seq_rel_date    asm_name    submitter   gbrs_paired_asm paired_asm_comp ftp_path    excluded_from_refseq    relation_to_type_material   asm_not_live_date
+    grep -m 1 " assembly_accession" "${1}" > /dev/null 2>&1
+    if [ $? -eq 0 ]; then return 1; fi
+
+    # if every line has 23 cols
+    awk 'BEGIN{FS=OFS="\t"}{print NF}' "${1}" | grep -v "23" > /dev/null 2>&1
+    if [ $? -eq 0 ]; then return 1; fi
+
+    # if every line starts with GCF_ or GCA_
+    grep -v "^GC[FA]_" "${1}" > /dev/null 2>&1
+    if [ $? -eq 0 ]; then return 1; fi
+
+    return 0;
 }
 
 get_assembly_summary() # parameter: ${1} assembly_summary file, ${2} database, ${3} organism_group - return number of lines
 {
+    # Collect urls to download
+    as_to_download=()
     for d in ${2//,/ }
     do
         # If no organism group is chosen, get complete assembly_summary for the database
         if [[ -z "${3}" ]]; then
-            download_url "${base_url}/genomes/${d}/assembly_summary_${d}.txt" | tail -n+3 >> "${1}"
+            as_to_download+=("${base_url}genomes/${d}/assembly_summary_${d}.txt")
+            if [[ "${tax_mode}" == "gtdb" ]]; then
+                as_to_download+=("${base_url}genomes/${d}/assembly_summary_${d}_historical.txt")
+            fi
         else
             for og in ${3//,/ }
             do
                 #special case: human
-                if [[ "${og}" == "human" ]]
-                then
-                    og="vertebrate_mammalian/Homo_sapiens"
+                if [[ "${og}" == "human" ]]; then og="vertebrate_mammalian/Homo_sapiens"; fi
+                as_to_download+=("${base_url}genomes/${d}/${og}/assembly_summary.txt")
+                if [[ "${tax_mode}" == "gtdb" ]]; then
+                    as_to_download+=("${base_url}genomes/${d}/${og}/assembly_summary_historical.txt")
                 fi
-                download_url "${base_url}/genomes/${d}/${og}/assembly_summary.txt" | tail -n+3 >> "${1}"
             done
         fi
     done
-    count_lines_file "${1}"
+
+    # Download files with retry attempts, checking consistency of assembly_summary after every download
+    for as in "${as_to_download[@]}"
+    do
+        for (( att=1; att<=${retry_download_batch}; att++ )); do
+            if [ "${att}" -gt 1 ]; then
+                echolog " - Failed to download ${as}. Trying again #${att}" "1"
+            fi
+            download_url "${as}" 2> /dev/null | tail -n+3 > "${1}.tmp" 
+            if check_assembly_summary "${1}.tmp"; then
+                cat "${1}.tmp" >> "${1}"
+                break; 
+            elif [ ${att} -eq ${retry_download_batch} ]; then
+                return 1; # failed to download after all attempts
+            fi
+        done
+    done
+    rm -f "${1}.tmp"
+
+    # Final check full file
+    if check_assembly_summary "${1}"; then
+        return 0;
+    else
+        return 1;
+    fi
 }
 
-write_history(){ # parameter: ${1} current label, ${2} new label, ${3} new timestamp, ${4} assembly_summary file, ${5} New (0->no/1->yes)
-    if [[ "${5}" -eq 1 ]]; then 
+write_history(){ # parameter: ${1} current label, ${2} new label, ${3} new timestamp, ${4} assembly_summary file
+    # if current label is the same as new label (new)
+    # reading the history
+    # Only new_label = NEW
+    # both current and new_label = UPDATE
+    # only current_label = FIX
+    if [[ "${1}" == "${2}" ]]; then 
         echo -e "#current_label\tnew_label\ttimestamp\tassembly_summary_entries\targuments" > ${history_file}
+        echo -n -e "\t" >> ${history_file}
+    else
+        echo -n -e "${1}\t" >> ${history_file}
     fi
-    echo -n -e "${1}\t" >> ${history_file}
     echo -n -e "${2}\t" >> ${history_file}
     echo -n -e "${3}\t" >> ${history_file}
     echo -n -e "$(count_lines_file ${4})\t" >> ${history_file}
     echo -e "${genome_updater_args}" >> ${history_file}
 }
 
-filter_assembly_summary() # parameter: ${1} assembly_summary file, ${2} number of lines
+filter_assembly_summary() # parameter: ${1} assembly_summary file, ${2} number of lines - return 1 if no lines or failed, 0 success
 {
     assembly_summary="${1}"
     filtered_lines=${2}
-    if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
+    if [[ "${filtered_lines}" -eq 0 ]]; then return 1; fi
     
+    gtdb_tax=""
+    ncbi_tax=""
+    ncbi_rank_tax=""
+    tmp_new_taxdump=""
+    if [[ "${tax_mode}" == "gtdb" ]]; then
+        echolog " - Downloading taxonomy (gtdb)" "1"
+        # Download and parse GTDB tax
+        gtdb_tax=$(tmp_file "gtdb_tax.tmp")
+        for url in "${gtdb_urls[@]}"; do
+            tmp_tax=$(tmp_file "gtdb_tax.tmp.gz")
+            if ! download_retry_md5 "${url}" "${tmp_tax}" "https://data.gtdb.ecogenomic.org/releases/release207/207.0/MD5SUM" "${retry_download_batch}"; then
+                return 1;
+            else
+                # awk to remove prefix RS_ or GB_
+                zcat "${tmp_tax}" | awk -F "\t" '{print substr($1, 4, length($1))"\t"$2}' >> "${gtdb_tax}"
+            fi
+            rm -f "${tmp_tax}"
+        done
+    elif [[ "${tax_mode}" == "ncbi" && ( ! -z "${taxids}" || ( ! -z "${top_assemblies_rank}" && "${top_assemblies_rank}" != "species" ) ) ]]; then
+        echolog " - Downloading taxonomy (ncbi)" "1"
+        tmp_new_taxdump="${working_dir}new_taxdump.tar.gz"
+        if ! download_retry_md5 "${base_url}/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" "${tmp_new_taxdump}" "${base_url}/pub/taxonomy/new_taxdump/new_taxdump.tar.gz.md5" "${retry_download_batch}"; then
+            return 1;
+        fi
+    fi
+
+    if [[ "${tax_mode}" == "gtdb" ]]; then
+        tmp_gtdb_missing=$(tmp_file "gtdb_missing")
+        gtdb_lines=$(filter_gtdb "${assembly_summary}" "${gtdb_tax}" "${tmp_gtdb_missing}")
+        echolog " - $((filtered_lines-gtdb_lines)) assemblies removed not in GTDB" "1"
+        
+        # If missing file has entries, report on log
+        gtdb_missing_lines=$(count_lines_file "${tmp_gtdb_missing}")
+        if [[ "${gtdb_missing_lines}" -gt 0 ]]; then
+            echolog " - Could not retrieve "${gtdb_missing_lines}" GTDB assemblies" "1"
+            cat "${tmp_gtdb_missing}" >> "${log_file}"    
+        fi
+        rm "${tmp_gtdb_missing}"
+
+        filtered_lines=${gtdb_lines}
+        if [[ "${filtered_lines}" -eq 0 ]]; then return 0; fi
+    fi
+
     # DATE
     if [[ ! -z "${date_start}" || ! -z "${date_end}" ]]; then
         date_lines=$(filter_date "${assembly_summary}")
         echolog " - $((filtered_lines-date_lines)) assemblies removed not in the date range [ ${date_start} .. ${date_end} ]" "1"
         filtered_lines=${date_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
-    fi
-
-    # SPECIES taxids
-    if [[ ! -z "${species}" ]]; then
-        species_lines=$(filter_species "${assembly_summary}")
-        echolog " - $((filtered_lines-species_lines)) assemblies removed not in species [${species}]" "1"
-        filtered_lines=${species_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
+        if [[ "${filtered_lines}" -eq 0 ]]; then return 0; fi
     fi
 
     # TAXIDS
     if [[ ! -z "${taxids}" ]]; then
-        echolog " - Downloading new taxdump and parsing lineages" "1"
-        taxids_lines=$(filter_taxids "${assembly_summary}")
+        if [[ "${tax_mode}" == "ncbi" ]]; then
+            unpack "${tmp_new_taxdump}" "${working_dir}" "taxidlineage.dmp"
+            ncbi_tax="${working_dir}taxidlineage.dmp"
+            taxids_lines=$(filter_taxids_ncbi "${assembly_summary}" "${ncbi_tax}")
+        else
+            taxids_lines=$(filter_taxids_gtdb "${assembly_summary}" "${gtdb_tax}")
+        fi
         echolog " - $((filtered_lines-taxids_lines)) assemblies removed not in taxids [${taxids}]" "1"
         filtered_lines=${taxids_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
+        if [[ "${filtered_lines}" -eq 0 ]]; then return 0; fi
     fi
 
     # Filter columns
@@ -182,49 +281,69 @@ filter_assembly_summary() # parameter: ${1} assembly_summary file, ${2} number o
     if [ "$((filtered_lines-columns_lines))" -gt 0 ]; then
         echolog " - $((filtered_lines-columns_lines)) assemblies removed based on filters:" "1"
         echolog "   valid URLs" "1"
-        echolog "   version status=latest" "1"
+        if [[ "${tax_mode}" == "ncbi" ]]; then echolog "   version status=latest" "1"; fi
         if [ ! -z "${refseq_category}" ]; then echolog "   refseq category=${refseq_category}" "1"; fi
         if [ ! -z "${assembly_level}" ]; then echolog "   assembly level=${assembly_level}" "1"; fi
         if [ ! -z "${custom_filter}" ]; then echolog "   custom filter=${custom_filter}" "1"; fi
         filtered_lines=${columns_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
-    fi
-
-    #GTDB
-    if [ "${gtdb_only}" -eq 1 ]; then
-        gtdb_lines=$(filter_gtdb "${assembly_summary}")
-        echolog " - $((filtered_lines-gtdb_lines)) assemblies removed not in GTDB" "1"
-        filtered_lines=${gtdb_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
+        if [[ "${filtered_lines}" -eq 0 ]]; then return 0; fi
     fi
 
     #TOP ASSEMBLIES
-    if [[ "${top_assemblies_species}" -gt 0 || "${top_assemblies_taxids}" -gt 0 ]]; then
-        top_lines=$(filter_top_assemblies "${assembly_summary}")
-        if [[ "${top_assemblies_species}" -gt 0 ]]; then
-            echolog " - $((filtered_lines-top_lines)) entries removed with top ${top_assemblies_species} assembly/species " "1"
+    if [ "${top_assemblies_num}" -gt 0 ]; then
+        # Add chosen rank as first col of a temporary assembly_summary
+        if [[ "${tax_mode}" == "ncbi" ]]; then
+            if [[ ! -z "${top_assemblies_rank}" && "${top_assemblies_rank}" != "species" ]]; then
+                unpack "${tmp_new_taxdump}" "${working_dir}" "rankedlineage.dmp"    
+                ncbi_rank_tax="${working_dir}rankedlineage.dmp"
+            fi
+            ranked_lines=$(add_rank_ncbi "${assembly_summary}" "${assembly_summary}_rank" "${ncbi_rank_tax}")
         else
-            echolog " - $((filtered_lines-top_lines)) entries removed with top ${top_assemblies_taxids} assembly/taxid" "1"
+            ranked_lines=$(add_rank_gtdb "${assembly_summary}" "${assembly_summary}_rank" "${gtdb_tax}")
+        fi
+        if [ $((filtered_lines-ranked_lines)) -gt 0 ]; then
+            echolog " - Failed to match all entries to taxonomic identifiers with ${top_assemblies}" "1"
         fi
+        top_lines=$(filter_top_assemblies "${assembly_summary}" "${assembly_summary}_rank")
+        echolog " - $((filtered_lines-top_lines)) entries removed with top ${top_assemblies}" "1"
+        rm -f "${assembly_summary}_rank"
         filtered_lines=${top_lines}
-        if [[ "${filtered_lines}" -eq 0 ]]; then return; fi
+        if [[ "${filtered_lines}" -eq 0 ]]; then return 0; fi
     fi
-    return 0
+
+    rm -f "${ncbi_tax}" "${ncbi_rank_tax}" "${gtdb_tax}" "${tmp_new_taxdump}"
+    return 0;
 }
 
-filter_taxids() # parameter: ${1} assembly_summary file - return number of lines
+filter_taxids_ncbi() # parameter: ${1} assembly_summary file, ${2} ncbi_tax file - return number of lines
 {
     # Keep only selected taxid lineage, removing at the end duplicated entries from duplicates on taxids
-    lineage_taxids=$(parse_new_taxdump "${taxids}")
-    join -1 6 -2 1 <(sort -k 6,6 "${1}") <(echo "${lineage_taxids//,/$'\n'}" | sort -k 1,1) -t$'\t' -o "1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22" | sort | uniq > "${1}_taxids"
+    tmp_lineage=$(tmp_file "lineage.tmp")
+    for tx in ${taxids//,/ }; do
+        txids_lin=$(grep "[^0-9]${tx}[^0-9]" "${2}" | cut -f 1) #get only taxids in the lineage section
+        echolog " - $(count_lines "${txids_lin}") children taxids in the lineage of ${tx}" "0"
+        echo "${txids_lin}" >> "${tmp_lineage}" 
+    done
+    lineage_taxids=$(sort ${tmp_lineage} | uniq | tr '\n' ',')${taxids} # put lineage back into the taxids variable with the provided taxids
+    rm "${tmp_lineage}"
+
+    # Join with assembly_summary based on taxid field 6
+    join -1 6 -2 1 <(sort -k 6,6 "${1}") <(echo "${lineage_taxids//,/$'\n'}" | sort -k 1,1) -t$'\t' -o ${join_as_fields1} | sort | uniq > "${1}_taxids"
     mv "${1}_taxids" "${1}"
     count_lines_file "${1}"
 }
 
-filter_species() # parameter: ${1} assembly_summary file - return number of lines
+filter_taxids_gtdb() # parameter: ${1} assembly_summary file, ${2} gtdb_tax file return number of lines
 {
-    join -1 7 -2 1 <(sort -k 7,7 "${1}") <(echo "${species//,/$'\n'}" | sort -k 1,1) -t$'\t' -o "1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22" | sort | uniq > "${1}_species"
-    mv "${1}_species" "${1}"
+    tmp_gtdb_acc=$(tmp_file "gtdb_acc.tmp")
+    IFS=","
+    for tx in ${taxids}; do
+        sed -e 's/\t/\t;/g' -e 's/$/;/p' ${2} | grep ";${tx};" | cut -f 1 >> "${tmp_gtdb_acc}"
+    done
+    IFS=$' '
+    join -1 1 -2 1 <(sort -k 1,1 "${1}") <(sort -k 1,1 "${tmp_gtdb_acc}" | uniq) -t$'\t' -o ${join_as_fields1} | sort | uniq > "${1}_taxids"
+    mv "${1}_taxids" "${1}"
+    rm "${tmp_gtdb_acc}"
     count_lines_file "${1}"
 }
 
@@ -240,64 +359,118 @@ filter_columns() # parameter: ${1} assembly_summary file - return number of line
     # Build string to filter file by columns in the format
     # colA:val1,val2|colB:val3
     # AND between cols, OR between values
-    colfilter="11:latest"
+    
+    colfilter=""
+    if [[ "${tax_mode}" == "ncbi" ]]; then
+        colfilter="11:latest|"
+    fi
     if [[ ! -z "${refseq_category}" ]]; then
-        colfilter="${colfilter}|5:${refseq_category}"
+        colfilter="${colfilter}5:${refseq_category}|"
     fi
     if [[ ! -z "${assembly_level}" ]]; then
-        colfilter="${colfilter}|12:${assembly_level}"
+        colfilter="${colfilter}12:${assembly_level}|"
     fi
     if [[ ! -z "${custom_filter}" ]]; then
-        colfilter="${colfilter}|${custom_filter}"
+        colfilter="${colfilter}${custom_filter}|"
     fi
 
-    awk -F "\t" -v colfilter="${colfilter}" 'BEGIN{
-        split(colfilter, fields, "|");
-        for(f in fields){
-            split(fields[f], keyvals, ":");
-            filter[keyvals[1]]=keyvals[2];}
-        } $20!="na" {
-            k=0;
-            for(f in filter){
-                split(filter[f], v, ","); for (i in v) vals[tolower(v[i])]="";
-                if(tolower($f) in vals){
-                    k+=1;
+    if [[ ! -z "${colfilter}" ]]; then
+        awk -F "\t" -v colfilter="${colfilter%?}" '
+            function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s }
+            function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s }
+            function trim(s) { return rtrim(ltrim(s)); }
+            BEGIN{
+            split(colfilter, fields, "|");
+            for(f in fields){
+                split(fields[f], keyvals, ":");
+                filter[keyvals[1]]=keyvals[2];}
+            } $20!="na" {
+                k=0;
+                for(f in filter){
+                    split(filter[f], v, ","); for (i in v) vals[tolower(trim(v[i]))]="";
+                    if(tolower($f) in vals){
+                        k+=1;
+                    }
+                };
+                if(k==length(filter)){
+                    print $0;
                 }
-            };
-            if(k==length(filter)){
-                print $0;
-            }
-        }' "${1}" > "${1}_filtered"
-    mv "${1}_filtered" "${1}"
+            }' "${1}" > "${1}_filtered"
+        mv "${1}_filtered" "${1}"
+    fi
     count_lines_file "${1}"
 }
 
-filter_gtdb() # parameter: ${1} assembly_summary file - return number of lines
+filter_gtdb() # parameter: ${1} assembly_summary file, ${2} gtdb_tax file,  ${3} gtdb_missing file - return number of lines
 {
-    gtdb_acc=${working_dir}"gtdb_acc"
-    for url in "${gtdb_urls[@]}"
-    do
-        # awk to remove prefix RS_ or GB_
-        download_url "${url}" | zcat | awk -F "\t" '{print substr($1, 4, length($1))}' >> "${gtdb_acc}"
-    done
-    join -1 1 -2 1 <(sort -k 1,1 "${1}") <(sort -k 1,1 "${gtdb_acc}") -t$'\t' -o "1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22" | sort | uniq > "${1}_gtdb"
+    # Check for missing entries
+    join -1 1 -2 1 <(sort -k 1,1 "${1}") <(sort -k 1,1 "${2}") -v 2 > ${3}
+    # Match entries
+    join -1 1 -2 1 <(sort -k 1,1 "${1}") <(sort -k 1,1 "${2}") -t$'\t' -o ${join_as_fields1} | sort | uniq > "${1}_gtdb"
     mv "${1}_gtdb" "${1}"
-    rm "${gtdb_acc}"
     count_lines_file "${1}"
 }
 
-filter_top_assemblies() # parameter: ${1} assembly_summary file - return number of lines
-{
-    if [ "${top_assemblies_species}" -gt 0 ]; then
-        taxcol="7";
-        top="${top_assemblies_species}";
+add_rank_ncbi(){ # parameter: ${1} assembly_summary file, ${2} modified assembly_summary file with rank as first col, ${3} ncbi_tax file - return number of lines
+    # rankedlineage.dmp cols (sep tab|tab):
+    # $1=taxid, $3=name, $5=species, $7=genus, $9=family, $11=order, $13=class, $15=phylum, $17=kingdom, $19=superkingdom
+    if [[ -z "${top_assemblies_rank}" ]]; then
+        # Repeat leaf taxid
+        awk 'BEGIN{FS=OFS="\t"}{print $6,$0}' "${1}" > "${2}"
+    elif [[ "${top_assemblies_rank}" == "species" ]]; then
+        # Repeat species taxid
+        awk 'BEGIN{FS=OFS="\t"}{print $7,$0}' "${1}" > "${2}"
     else
-        taxcol="6";
-        top="${top_assemblies_taxids}";
+        # export taxid <tab> ranked name
+        tmp_ranked_taxids=$(tmp_file "ranked_taxids.tmp")
+        awk -v rank="${top_assemblies_rank}" 'BEGIN{
+                FS=OFS="\t";
+                r["genus"]=7;
+                r["family"]=9;
+                r["order"]=11;
+                r["class"]=13;
+                r["phylum"]=15;
+                r["superkingdom"]=19;
+            }{
+                print $1, $r[rank] ? $r[rank] : $1;
+            }' "${3}" > "${tmp_ranked_taxids}"
+        # Join ranked name by taxid col
+        join -1 6 -2 1 <(sort -k 6,6 "${1}") <(sort -k 1,1 "${tmp_ranked_taxids}") -t$'\t' -o "2.2,${join_as_fields1}" > "${2}"
+        rm -f "${tmp_ranked_taxids}"
     fi
+    count_lines_file "${2}"
+}
+
+add_rank_gtdb(){ # parameter: ${1} assembly_summary file, ${2} modified assembly_summary file with rank as first col, ${3} gtdb_tax file - return number of lines
+    # gtdb taxonomy (RS_ and GB_ already stripped)
+    # accession.version <tab> d__Bacteria;p__Firmicutes;c__Bacilli;o__Staphylococcales;f__Staphylococcaceae;g__Staphylococcus;s__Staphylococcus aureus
+    # export accession <tab> ranked name
+    #if top_assemblies_rank empty, default to species (leaves on gtdb)
+    tmp_ranked_accessions=$(tmp_file "ranked_accessions.tmp")
+    cat "${3}" | tr ';' '\t' | awk -v rank="${top_assemblies_rank:-species}" 'BEGIN{
+            FS=OFS="\t";
+            r["species"]=8;
+            r["genus"]=7;
+            r["family"]=6;
+            r["order"]=5;
+            r["class"]=4;
+            r["phylum"]=3;
+            r["superkingdom"]=2;
+        }{
+            print $1, $r[rank] ? $r[rank] : $1;
+        }' > "${tmp_ranked_accessions}"
+
+    # Join ranked taxid by accession
+    join -1 1 -2 1 <(sort -k 1,1 "${1}") <(sort -k 1,1 "${tmp_ranked_accessions}") -t$'\t' -o "2.2,${join_as_fields1}" > "${2}"
+    rm -f "${tmp_ranked_accessions}"
+    count_lines_file "${2}"
+}
 
-    awk -v taxcol="${taxcol}" 'BEGIN{
-            FS="\t";OFS="\t";
+filter_top_assemblies() # parameter: ${1} assembly_summary file, ${2} modified assembly_summary file with rank as first col - return number of lines
+{
+    # First col contains rank info (all other get shifted with +1)
+    awk -v taxcol="1" 'BEGIN{
+            FS=OFS="\t";
             col5["reference genome"]=1;
             col5["representative genome"]=2;
             col5["na"]=3;
@@ -312,13 +485,14 @@ filter_top_assemblies() # parameter: ${1} assembly_summary file - return number
             col22["assembly designated as reftype"]=5;
             col22["ICTV species exemplar"]=6;
             col22["ICTV additional isolate"]=7;
+            max_val=9;
         }{
-            gsub("/","",$15); 
-            print $1,$taxcol,$5 in col5 ? col5[$5] : 9 ,$12 in col12 ? col12[$12] : 9,$22 in col22 ? col22[$22] : 9 ,$15;
-        }' "${1}" | sort -t$'\t' -k 2,2 -k 3,3 -k 4,4 -k 5,5 -k 6nr,6 -k 1,1 | awk -v top="${top}" '{if(cnt[$2]<top){print $1;cnt[$2]+=1}}' > "${1}_top_acc"
-    join <(sort -k 1,1 "${1}_top_acc") <(sort -k 1,1 "${1}") -t$'\t' -o "2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12,2.13,2.14,2.15,2.16,2.17,2.18,2.19,2.20,2.21,2.22" > "${1}_top"
+            gsub("/","",$(15+1)); 
+            print $(1+1), $taxcol, $(5+1) in col5 ? col5[$(5+1)] : max_val, $(12+1) in col12 ? col12[$(12+1)] : max_val, $(22+1) in col22 ? col22[$(22+1)] : max_val, $(15+1);
+        }' "${2}" | sort -t$'\t' -k 2,2 -k 3,3 -k 4,4 -k 5,5 -k 6nr,6 -k 1,1 | awk -v top="${top_assemblies_num}" 'BEGIN{FS=OFS="\t"}{if(cnt[$2]<top){print $1;cnt[$2]+=1}}' > "${2}_top_acc"
+    join <(sort -k 1,1 "${2}_top_acc") <(sort -k 1,1 "${1}") -t$'\t' -o ${join_as_fields2} > "${1}_top"
     mv "${1}_top" "${1}"
-    rm "${1}_top_acc"
+    rm "${2}_top_acc"
     count_lines_file "${1}"
 }
 
@@ -331,10 +505,19 @@ list_files() # parameter: ${1} file, ${2} fields [assembly_accesion,url], ${3} e
     done
 }
 
+tmp_file(){ # parameter: ${1} filename - return full path of created file
+    f="${working_dir}${1}"
+    rm -f "${f}"
+    touch "${f}"
+    echo "${f}"
+}
+
 print_progress() # parameter: ${1} file number, ${2} total number of files
 {
-    if [ "${silent_progress}" -eq 0 ] && [ "${silent}" -eq 0 ] ; then printf "%8d/%d - " ${1} ${2}; fi #Only prints when not silent and not only progress
-    if [ "${silent_progress}" -eq 1 ] || [ "${silent}" -eq 0 ] ; then printf "%6.2f%%\r" $(bc -l <<< "scale=4;(${1}/${2})*100"); fi #Always prints besides when it's silent
+    if [ "${silent_progress}" -eq 1 ] || [ "${silent}" -eq 0 ] ; then
+        printf "%5d/%d - " ${1} ${2}
+        printf "%2.2f%%\r" $(bc -l <<< "scale=4;(${1}/${2})*100")
+    fi
 }
 export -f print_progress #export it to be accessible to the parallel call
 
@@ -422,25 +605,20 @@ export -f download
 
 download_files() # parameter: ${1} file, ${2} fields [assembly_accesion,url] or field [url,filename], ${3} extension
 {
-
-    url_list_download=${working_dir}url_list_download.tmp #Temporary url list of files to download in this call
-    url_success_download=${working_dir}url_success_download.tmp #Temporary url list of downloaded files
-    touch ${url_success_download}
-
+    url_list_download=$(tmp_file "url_list_download.tmp") #Temporary url list of files to download in this call
     # sort files to get all files for the same entry in sequence, in case of failure 
-    if [ -z ${3:-} ] #direct download (url+file)
-    then
+    if [ -z ${3:-} ]; then #direct download (url+file)
         cut --fields="${2}" ${1} | tr '\t' '/' | sort > "${url_list_download}"
     else
         list_files ${1} ${2} ${3} | cut -f 2,3 | tr '\t' '/' | sort > "${url_list_download}"
     fi
     total_files=$(count_lines_file "${url_list_download}")
 
+    url_success_download=$(tmp_file "url_success_download.tmp") #Temporary url list of downloaded files
     # Retry download in batches
     for (( att=1; att<=${retry_download_batch}; att++ )); do
-
         if [ "${att}" -gt 1 ]; then
-            echolog " - Download attempt #${att}" "1"
+            echolog " - Failed download - ${failed_count} files. Trying again #${att}" "1"
             # Make a new list to download without entres already successfuly downloaded
             join <(sort "${url_list_download}") <(sort "${url_success_download}") -v 1 > "${url_list_download}_2"
             mv "${url_list_download}_2" "${url_list_download}"
@@ -462,7 +640,6 @@ download_files() # parameter: ${1} file, ${2} fields [assembly_accesion,url] or
             break;
         fi
     done
-    #print_progress 100 100
 
     # Output URL reports
     if [ "${url_list}" -eq 1 ]; then 
@@ -574,40 +751,6 @@ print_debug() # parameters: ${1} tools
     echo "========================================================";
 }
 
-# Defaults
-database=""
-organism_group=""
-species=""
-taxids=""
-refseq_category=""
-assembly_level=""
-custom_filter=""
-file_formats="assembly_report.txt"
-top_assemblies_species=0
-top_assemblies_taxids=0
-date_start=""
-date_end=""
-gtdb_only=0
-download_taxonomy=0
-delete_extra_files=0
-check_md5=0
-updated_assembly_accession=0
-updated_sequence_accession=0
-url_list=0
-dry_run=0
-just_fix=0
-conditional_exit=0
-silent=0
-silent_progress=0
-debug_mode=0
-working_dir=""
-external_assembly_summary=""
-retry_download_batch=3
-label=""
-rollback_label=""
-threads=1
-verbose_log=0
-
 function print_logo {
     echo "┌─┐┌─┐┌┐┌┌─┐┌┬┐┌─┐    ┬ ┬┌─┐┌┬┐┌─┐┌┬┐┌─┐┬─┐";
     echo "│ ┬├┤ ││││ ││││├┤     │ │├─┘ ││├─┤ │ ├┤ ├┬┘";
@@ -624,62 +767,90 @@ function showhelp {
     print_logo
     echo
     echo $'Database options:'
-    echo $' -d Database (comma-separated entries) [genbank, refseq]'
+    echo $' -d Database (comma-separated entries)\n\t[genbank, refseq]'
     echo
     echo $'Organism options:'
-    echo $' -g Organism group (comma-separated entries) [archaea, bacteria, fungi, human, invertebrate, metagenomes, other, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]. Example: archaea,bacteria.\n\tDefault: ""'
-    echo $' -S Species level taxonomic ids (comma-separated entries). Example: 622,562\n\tDefault: ""'
-    echo $' -T Any taxonomic ids - children lineage will be generated (comma-separated entries). Example: 620,649776\n\tDefault: ""'
+    echo $' -g Organism group(s) (comma-separated entries, empty for all)\n\t[archaea, bacteria, fungi, human, invertebrate, metagenomes, \n\tother, plant, protozoa, vertebrate_mammalian, vertebrate_other, viral]\n\tDefault: ""'
+    echo $' -T Taxonomic identifier(s) (comma-separated entries, empty for all).\n\tExample: "562" (for -M ncbi) or "s__Escherichia coli" (for -M gtdb)\n\tDefault: ""'
     echo
     echo $'File options:'
-    echo $' -f files to download [genomic.fna.gz,assembly_report.txt, ...] check ftp://ftp.ncbi.nlm.nih.gov/genomes/all/README.txt for all file formats\n\tDefault: assembly_report.txt'
+    echo $' -f file type(s) (comma-separated entries)\n\t[genomic.fna.gz, assembly_report.txt, protein.faa.gz, genomic.gbff.gz]\n\tMore formats at https://ftp.ncbi.nlm.nih.gov/genomes/all/README.txt\n\tDefault: assembly_report.txt'
     echo
     echo $'Filter options:'
-    echo $' -c refseq category (comma-separated entries, empty for all) [reference genome, representative genome, na]\n\tDefault: ""'
-    echo $' -l assembly level (comma-separated entries, empty for all) [complete genome, chromosome, scaffold, contig]\n\tDefault: ""' 
-    echo $' -P Number of top references for each species nodes to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)\n\tDefault: 0'
-    echo $' -A Number of top references for each taxids (leaf nodes) to download. 0 for all. Selection order: RefSeq Category, Assembly level, Relation to type material, Date (most recent first)\n\tDefault: 0'
-    echo $' -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive). Example: -F "2:PRJNA12377,PRJNA670754|14:Partial" for column infos check ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt\n\tDefault: ""'
-    echo $' -D Start date to keep sequences (>=), based on the sequence release date. Format YYYYMMDD. Example: 20201030\n\tDefault: ""'
-    echo $' -E End date to keep sequences (<=), based on the sequence release date. Format YYYYMMDD. Example: 20201231\n\tDefault: ""'
-    echo $' -z Keep only assemblies present on the latest GTDB release'
+    echo $' -c refseq category (comma-separated entries, empty for all)\n\t[reference genome, representative genome, na]\n\tDefault: ""'
+    echo $' -l assembly level (comma-separated entries, empty for all)\n\t[complete genome, chromosome, scaffold, contig]\n\tDefault: ""' 
+    echo $' -D Start date (>=), based on the sequence release date. Format YYYYMMDD.\n\tDefault: ""'
+    echo $' -E End date (<=), based on the sequence release date. Format YYYYMMDD.\n\tDefault: ""'
+    echo $' -F custom filter for the assembly summary in the format colA:val1|colB:valX,valY (case insensitive).\n\tExample: -F "2:PRJNA12377,PRJNA670754|14:Partial" (AND between cols, OR between values)\n\tColumn info at https://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt\n\tDefault: ""'
     echo
-    echo $'Report options:'
-    echo $' -u Report of updated assembly accessions (Added/Removed, assembly accession, url)'
-    echo $' -r Report of updated sequence accessions (Added/Removed, assembly accession, genbank accession, refseq accession, sequence length, taxid). Only available when file format assembly_report.txt is selected and successfully downloaded'
-    echo $' -p Output list of URLs for downloaded and failed files'
+    echo $'Taxonomy options:'
+    echo $' -M Taxonomy. gtdb keeps only assemblies in GTDB (R207). ncbi keeps only latest assemblies (version_status). \n\t[ncbi, gtdb]\n\tDefault: "ncbi"'
+    echo $' -A Keep a limited number of assemblies for each selected taxa (leaf nodes). 0 for all. \n\tSelection by ranks are also supported with rank:number (e.g genus:3)\n\t[species, genus, family, order, class, phylum, kingdom, superkingdom]\n\tSelection order based on: RefSeq Category, Assembly level, Relation to type material, Date.\n\tDefault: 0'
+    echo $' -a Keep the current version of the taxonomy database in the output folder'
     echo
     echo $'Run options:'
     echo $' -o Output/Working directory \n\tDefault: ./tmp.XXXXXXXXXX'
-    echo $' -b Version label\n\tDefault: current timestamp (YYYY-MM-DD_HH-MM-SS)'
-    echo $' -e External "assembly_summary.txt" file to recover data from. Mutually exclusive with -d / -g \n\tDefault: ""'
-    echo $' -R Number of attempts to retry to download files in batches \n\tDefault: 3'
-    echo $' -B Base label to use as the current version. Can be used to rollback to an older version or to create multiple branches from a base version. It only applies for updates. \n\tDefault: ""'
-    echo $' -k Dry-run, no data is downloaded or updated - just checks for available sequences and changes'
-    echo $' -i Fix failed downloads or any incomplete data from a previous run, keep current version'
-    echo $' -m Check MD5 of downloaded files'
     echo $' -t Threads to parallelize download and some file operations\n\tDefault: 1'
+    echo $' -k Dry-run mode. No sequence data is downloaded or updated - just checks for available sequences and changes'
+    echo $' -i Fix only mode. Re-downloads incomplete or failed data from a previous run. Can also be used to change files (-f).'
+    echo $' -m Check MD5 of downloaded files'
+    echo
+    echo $'Report options:'
+    echo $' -u Updated assembly accessions report\n\t(Added/Removed, assembly accession, url)'
+    echo $' -r Updated sequence accessions report\n\t(Added/Removed, assembly accession, genbank accession, refseq accession, sequence length, taxid)\n\tOnly available when file format assembly_report.txt is selected and successfully downloaded'
+    echo $' -p Reports URLs successfuly downloaded and failed (url_failed.txt url_downloaded.txt)'
     echo
     echo $'Misc. options:'
-    echo $' -x Allow the deletion of regular extra files if any found in the files folder. Symbolic links that do not belong to the current version will always be deleted.'
-    echo $' -a Download the current version of the NCBI taxonomy database (taxdump.tar.gz)'
+    echo $' -b Version label\n\tDefault: current timestamp (YYYY-MM-DD_HH-MM-SS)'
+    echo $' -e External "assembly_summary.txt" file to recover data from. Mutually exclusive with -d / -g \n\tDefault: ""'
+    echo $' -B Alternative version label to use as the current version. Mutually exclusive with -i.\n\tCan be used to rollback to an older version or to create multiple branches from a base version.\n\tDefault: ""'
+    echo $' -R Number of attempts to retry to download files in batches \n\tDefault: 3'
+    echo $' -n Conditional exit status based on number of failures accepted, otherwise will Exit Code = 1.\n\tExample: -n 10 will exit code 1 if 10 or more files failed to download\n\t[integer for file number, float for percentage, 0 = off]\n\tDefault: 0'
+    echo $' -L Downloader\n\t[wget, curl]\n\tDefault: wget'
+    echo $' -x Allow the deletion of regular extra files (not symbolic links) found in the output folder'
     echo $' -s Silent output'
-    echo $' -w Silent output with download progress (%) and download version at the end'
-    echo $' -n Conditional exit status. Exit Code = 1 if more than N files failed to download (integer for file number, float for percentage, 0 -> off)\n\tDefault: 0'
-    echo $' -V Verbose log to report successful file downloads'
+    echo $' -w Silent output with download progress only'
+    echo $' -V Verbose log'
     echo $' -Z Print debug information and run in debug mode'
     echo
 }
 
+# Defaults
+database=""
+organism_group=""
+taxids=""
+refseq_category=""
+assembly_level=""
+custom_filter=""
+file_formats="assembly_report.txt"
+top_assemblies=0
+date_start=""
+date_end=""
+tax_mode="ncbi"
+download_taxonomy=0
+delete_extra_files=0
+check_md5=0
+updated_assembly_accession=0
+updated_sequence_accession=0
+url_list=0
+dry_run=0
+just_fix=0
+conditional_exit=0
+silent=0
+silent_progress=0
+debug_mode=0
+working_dir=""
+external_assembly_summary=""
+retry_download_batch=3
+label=""
+rollback_label=""
+threads=1
+verbose_log=0
+downloader_tool="wget"
+
 # Check for required tools
 tool_not_found=0
-tools=( "awk" "bc" "find" "join" "md5sum" "parallel" "sed" "tar" "xargs" )
-if [[ "${use_curl}" -eq 1 ]]; then
-    tools+=("curl")
-else
-    tools+=("wget")
-fi
-
+tools=( "awk" "bc" "find" "join" "md5sum" "parallel" "sed" "tar" "xargs" "wget" )
 for t in "${tools[@]}"
 do
     if [ ! -x "$(command -v ${t})" ]; then
@@ -689,11 +860,55 @@ do
 done
 if [ "${tool_not_found}" -eq 1 ]; then exit 1; fi
 
+# Parse -o and -B first to detect possible updates
+getopts_list="aA:b:B:c:d:D:e:E:f:F:g:hikl:L:mM:n:o:prR:st:T:uVwxZ"
 OPTIND=1 # Reset getopts
-while getopts "aA:b:B:d:D:c:De:E:f:F:g:hikl:mn:o:pP:rR:sS:t:T:uVwxzZ" opt; do
+# Parses working_dir from "$@"
+while getopts "${getopts_list}" opt; do
+  case ${opt} in
+    o) working_dir=${OPTARG} ;;
+    B) rollback_label=${OPTARG} ;;
+    \?) echo "Invalid options" >&2; exit 1 ;;
+    :) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;;
+  esac
+done
+
+# If workingdir exists and there's a history file, grab and inject params
+if [[ ! -z "${working_dir}" && -s "${working_dir}/history.tsv" ]]; then
+    
+    if [[ ! -z "${rollback_label}" ]]; then
+        # If rolling back, get specific parameters of that version
+        rollback_assembly_summary="${working_dir}/${rollback_label}/assembly_summary.txt"
+        if [[ -f "${rollback_assembly_summary}" ]]; then
+            declare -a "args=($(awk -F '\t' '$2 == "'${rollback_label}'"' "${working_dir}/history.tsv" | cut -f 5))"
+        else
+            echo "Rollback label/assembly_summary.txt not found ["${rollback_assembly_summary}"]"; exit 1
+        fi
+    else
+        # Parse arguments into associative array
+        # automatically detecting and replacing the escaped non-printable characters (e.g.: complete\ genome)
+        declare -a "args=($(cut -f 5 "${working_dir}/history.tsv" | tail -n 1))"
+    fi
+
+    # For each entry of the current argument list $@
+    # add to the end of the array to have priority
+    c=${#args[@]}
+    for f in "$@"; do 
+        args[$c]="${f}"
+        c=$((c+1))
+    done
+else
+    # parse command line arguments by default
+    declare -a "args=($( printf "%q " "$@" ))"
+fi
+
+declare -A new_args
+bool_args=""
+OPTIND=1 # Reset getopts
+while getopts "${getopts_list}" opt "${args[@]}"; do
   case ${opt} in
     a) download_taxonomy=1 ;;
-    A) top_assemblies_taxids=${OPTARG} ;;
+    A) top_assemblies=${OPTARG} ;;
     b) label=${OPTARG} ;;
     B) rollback_label=${OPTARG} ;;
     c) refseq_category=${OPTARG} ;;
@@ -704,101 +919,192 @@ while getopts "aA:b:B:d:D:c:De:E:f:F:g:hikl:mn:o:pP:rR:sS:t:T:uVwxzZ" opt; do
     f) file_formats=${OPTARG// } ;; #remove spaces
     F) custom_filter=${OPTARG} ;;
     g) organism_group=${OPTARG// } ;; #remove spaces
-    h|\?) showhelp; exit 0 ;;
+    h) showhelp; exit 0 ;;
     i) just_fix=1 ;;
     k) dry_run=1 ;;
     l) assembly_level=${OPTARG} ;;
+    L) downloader_tool=${OPTARG} ;;
     m) check_md5=1 ;;
+    M) tax_mode=${OPTARG} ;;
     n) conditional_exit=${OPTARG} ;;
     o) working_dir=${OPTARG} ;;
     p) url_list=1 ;;
-    P) top_assemblies_species=${OPTARG} ;;
     r) updated_sequence_accession=1 ;;
     R) retry_download_batch=${OPTARG} ;;
     s) silent=1 ;;
-    S) species=${OPTARG// } ;; #remove spaces
     t) threads=${OPTARG} ;;
-    T) taxids=${OPTARG// } ;; #remove spaces
+    T) taxids=${OPTARG} ;;
     u) updated_assembly_accession=1 ;;
     V) verbose_log=1 ;;
     w) silent_progress=1 ;;
     x) delete_extra_files=1 ;;
-    z) gtdb_only=1 ;;
     Z) debug_mode=1 ;;
-    :) echo "Option -${OPTARG} requires an argument." >&2; exit 1 ;;
+    \?) echo "Invalid options" >&2; exit 1 ;;
+    :) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;;
   esac
+
+  # Colect parsed args in an associative array for each opt
+  # the args added later have precedence
+  if [ "${OPTARG-unset}" = unset ]; then
+    bool_args="${bool_args} -${opt}"  # boolean args, OPTARG is not set in getopts
+  elif [[ ! -z "${OPTARG}" ]]; then
+    new_args[${opt}]="-${opt} '${OPTARG}'" # args with option argument
+  else
+    unset new_args[${opt}] # args with option argument set to ''
+  fi
+
 done
 
-# Print tools and versions
+# No params
+if [ ${OPTIND} -eq 1 ]; then showhelp; exit 1; fi
+
+# Activate debug mode
 if [ "${debug_mode}" -eq 1 ] ; then 
-    print_debug tools;
+    print_debug tools  # Print tools and versions
     # If debug is the only parameter, exit, otherwise set debug mode for the run (set -x)
-    if [ ${OPTIND} -eq 2 ]; then
+    if [ $# -eq 1 ]; then
         exit 0;
     else
         set -x
     fi
 fi
-# No params
-if [ ${OPTIND} -eq 1 ]; then 
-    showhelp; 
-    exit 1;
+
+# Build argument list to save
+genome_updater_args="${new_args[@]}"
+export genome_updater_args
+
+######################### Parameter validation ######################### 
+
+# If fixing/recovering, need to have assembly_summary.txt
+if [[ ! -z "${external_assembly_summary}" ]]; then
+    if [[ ! -f "${external_assembly_summary}" ]] ; then
+        echo "External assembly_summary.txt not found [$(readlink -m ${external_assembly_summary})]"; exit 1;
+    elif [[ ! -z "${database}" || ! -z "${organism_group}" ]]; then
+        echo "External assembly_summary.txt cannot be used with database (-d) and/or organism group (-g)"; exit 1;
+    fi
 fi
-shift $((OPTIND-1))
-[ "${1:-}" = "--" ] && shift
 
-######################### General parameter validation ######################### 
-if [[ -z "${database}" ]]; then
+if [[ ! -z "${rollback_label}" && "${just_fix}" -eq 1 ]]; then
+    echo "-B and -i are mutually exclusive. To continue an update from a previus run, use -B ''"; exit 1;
+fi
+
+if [[ ! "${file_formats}" =~ "assembly_report.txt" && "${updated_sequence_accession}" -eq 1 ]]; then
+    echo "Updated sequence accessions report (-r) can only be used if -f contains 'assembly_report.txt'"; exit 1;
+fi
+
+if [[ -z "${database}" && -z "${external_assembly_summary}" ]]; then
     echo "Database is required (-d)"; exit 1;
-else
+elif [[ ! -z "${database}" ]]; then
     valid_databases=( "genbank" "refseq" )
-    for d in ${database//,/ }
-    do
+    for d in ${database//,/ }; do
         if [[ ! " ${valid_databases[@]} " =~ " ${d} " ]]; then
-            echo "Database ${d} is not valid"; exit 1;
+            echo "${d}: invalid database [ $(printf "'%s' " "${valid_databases[@]}")]"; exit 1;
         fi
     done
 fi
 
-valid_organism_groups=( "archaea" "bacteria" "fungi" "human" "invertebrate" "metagenomes" "other" "plant" "protozoa" "vertebrate_mammalian" "vertebrate_other" "viral" )
-for og in ${organism_group//,/ }
-do
-    if [[ ! " ${valid_organism_groups[@]} " =~ " ${og} " ]]; then
-        echo "Invalid organism group - ${og}"; exit 1;
-    fi
-done
-
-if [[ ! -z "${species}"  ]]; then
-    if [[ ! "${species}" =~ ^[0-9,]+$ ]]; then
-        echo "Invalid species taxids"; exit 1;
+gtdb_urls=()
+if [[ "${tax_mode}" == "gtdb" ]]; then
+    if [[ -z "${organism_group}" ]]; then
+        gtdb_urls+=("https://data.gtdb.ecogenomic.org/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz")
+        gtdb_urls+=("https://data.gtdb.ecogenomic.org/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz")
+    else
+        for og in ${organism_group//,/ }; do
+            if [[ "${og}" == "archaea" ]]; then
+                gtdb_urls+=("https://data.gtdb.ecogenomic.org/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz")
+            elif [[ "${og}" == "bacteria" ]]; then
+                gtdb_urls+=("https://data.gtdb.ecogenomic.org/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz")
+            else
+                echo "${og}: invalid organism group for GTDB [ 'archaea' 'bacteria' ] "; exit 1;
+            fi
+        done
     fi
+elif [[ "${tax_mode}" == "ncbi" ]]; then
+    valid_organism_groups=( "archaea" "bacteria" "fungi" "human" "invertebrate" "metagenomes" "other" "plant" "protozoa" "vertebrate_mammalian" "vertebrate_other" "viral" )
+    for og in ${organism_group//,/ }; do
+        if [[ ! " ${valid_organism_groups[@]} " =~ " ${og} " ]]; then
+            echo "${og}: invalid organism group [ $(printf "'%s' " "${valid_organism_groups[@]}")]"; exit 1;
+        fi
+    done
+else
+    echo "${tax_mode}: invalid taxonomy mode ['ncbi' 'gtdb']"; exit 1;
 fi
 
-if [[ ! -z "${taxids}"  ]]; then
-    if [[ ! "${taxids}" =~ ^[0-9,]+$ ]]; then
-        echo "Invalid taxids"; exit 1;
+if [[ "${tax_mode}" == "ncbi" ]]; then
+    if [[ ! -z "${taxids}"  ]]; then
+        if [[ ! "${taxids}" =~ ^[0-9,]+$ ]]; then
+            echo "${taxids}: invalid taxids"; exit 1;
+        fi
     fi
+    taxids=${taxids// } # remove spaces
+elif [[ "${tax_mode}" == "gtdb" ]]; then
+    IFS=","
+    for tx in ${taxids}; do
+        if [[ ! "${tx}" =~ ^[dpcofgs]__.* ]]; then
+            echo "${tx}: invalid taxid"; exit 1;
+        fi
+    done
+    IFS=$' '
 fi
 
-# If fixing/recovering, need to have assembly_summary.txt
-if [[ ! -z "${external_assembly_summary}" ]]; then
-    if [[ ! -f "${external_assembly_summary}" ]] ; then
-        echo "External assembly_summary.txt not found [$(readlink -m ${external_assembly_summary})]"; exit 1;
-    elif [[ ! -z "${organism_group}"  ]]; then
-        echo "External assembly_summary.txt cannot be used with organism group (-g)"; exit 1;
+# top assemblies by rank
+if [[ ! "${top_assemblies}" =~ ^[0-9]+$ && ! "${top_assemblies}" =~ ^(superkingdom|phylum|class|order|family|genus|species)\:[1-9]+$ ]]; then
+    echo "${top_assemblies}: invalid top assemblies - should be a number > 0 or [superkingdom|phylum|class|order|family|genus|species]:number"; exit 1;
+else
+    top_assemblies_rank=""
+    if [[ "${top_assemblies}" =~ ^[0-9]+$ ]]; then
+        top_assemblies_num=${top_assemblies}
+    else
+        top_assemblies_rank=${top_assemblies%:*}
+        top_assemblies_num=${top_assemblies#*:}
     fi
 fi
 
-# top taxids/species
-if [[ ! "${top_assemblies_species}" =~ ^[0-9]+$ ]]; then
-    echo "Invalid numberof top assemblies by species"; exit 1;
+IFS=","
+valid_refseq_category=( "reference genome" "representative genome" "na" )
+if [[ ! -z "${refseq_category}" ]]; then
+    for rc in ${refseq_category}; do
+        # ${rc,,} to lowercase
+        if [[ ! " ${valid_refseq_category[@]} " =~ " ${rc,,} " ]]; then
+            echo "${rc}: invalid refseq category [ $(printf "'%s' " "${valid_refseq_category[@]}")]"; exit 1;
+        fi
+    done
 fi
-if [[ ! "${top_assemblies_taxids}" =~ ^[0-9]+$ ]]; then
-    echo "Invalid numberof top assemblies by taxids"; exit 1;
+if [[ ! -z "${assembly_level}" ]]; then
+    valid_assembly_level=( "complete genome" "chromosome" "scaffold" "contig" )
+    for al in ${assembly_level}; do
+        # ${al,,} to lowercase
+        if [[ ! " ${valid_assembly_level[@]} " =~ " ${al,,} " ]]; then
+            echo "${al}: invalid assembly level [ $(printf "'%s' " "${valid_assembly_level[@]}")]"; exit 1;
+        fi
+    done
+fi
+IFS=$' '
+if [[ ! -z "${date_start}" ]]; then
+    if ! date "+%Y%m%d" -d "${date_start}" > /dev/null 2>&1; then
+        echo "${date_start}: invalid start date"; exit 1;
+    fi
+fi
+if [[ ! -z "${date_end}" ]]; then
+    if ! date "+%Y%m%d" -d "${date_end}" > /dev/null 2>&1; then
+        echo "${date_end}: invalid end date"; exit 1;
+    fi
 fi
-
 
 ######################### Variable assignment ######################### 
+
+# Define downloader to use
+if [[ ! -z "${local_dir}" || "${downloader_tool}" == "curl" ]]; then
+    function downloader(){ # parameter: ${1} output file, ${2} url
+        curl --silent --retry ${retries} --connect-timeout ${timeout} --output "${1}" "${2}"
+    }
+else
+    function downloader(){ # parameter: ${1} output file, ${2} url
+        wget --quiet --continue --tries ${retries} --read-timeout ${timeout} --output-document "${1}" "${2}"
+    }
+fi
+export -f downloader
+
 if [ "${silent}" -eq 1 ] ; then 
     silent_progress=0
 elif [ "${silent_progress}" -eq 1 ] ; then 
@@ -831,25 +1137,23 @@ else
 fi
 
 # If file already exists and it's a new repo
-if [[ ( -f "${default_assembly_summary}" || -L "${default_assembly_summary}" ) && "${MODE}" == "NEW" ]]; then
-    echo "Cannot start a new repository with an existing assembly_summary.txt in the working directory [${default_assembly_summary}]"; exit 1;
+if [[ "${MODE}" == "NEW" ]]; then
+    if [[ -f "${default_assembly_summary}" || -L "${default_assembly_summary}" ]]; then
+        echo "Cannot start a new repository with an existing assembly_summary.txt in the working directory [${default_assembly_summary}]"; exit 1;
+    fi
 fi
 
 # If file already exists and it's a new repo
-if [[ ! -f "${default_assembly_summary}" && "${MODE}" == "FIX" ]]; then
-    echo "Cannot find assembly_summary.txt version to fix [${default_assembly_summary}]"; exit 1;
+if [[ "${MODE}" == "FIX" ]]; then
+    if [[ ! -f "${default_assembly_summary}" ]]; then
+        echo "Cannot find assembly_summary.txt version to fix [${default_assembly_summary}]"; exit 1;
+    fi
 fi
 
-# mode specific variables
-if [[ "${MODE}" == "UPDATE" ]] || [[ "${MODE}" == "FIX" ]]; then # get existing version information
-    # Check if default assembly_summary is a symbolic link to some version
-    if [[ ! -L "${default_assembly_summary}"  ]]; then
-        echo "assembly_summary.txt is not a link to any version [${default_assembly_summary}]"; exit 1
-    fi
-    
+if [[ "${MODE}" == "UPDATE" ]]; then
     # Rollback to a different base version
     if [[ ! -z "${rollback_label}" ]]; then
-        rollback_assembly_summary="${working_dir}${rollback_label}/assembly_summary.txt"
+        rollback_assembly_summary="${working_dir}/${rollback_label}/assembly_summary.txt"
         if [[ -f "${rollback_assembly_summary}" ]]; then
             rm ${default_assembly_summary}
             ln -s -r "${rollback_assembly_summary}" "${default_assembly_summary}"
@@ -857,7 +1161,13 @@ if [[ "${MODE}" == "UPDATE" ]] || [[ "${MODE}" == "FIX" ]]; then # get existing
             echo "Rollback label/assembly_summary.txt not found ["${rollback_assembly_summary}"]"; exit 1
         fi
     fi
+fi
 
+if [[ "${MODE}" == "UPDATE" ]] || [[ "${MODE}" == "FIX" ]]; then # get existing version information
+    # Check if default assembly_summary is a symbolic link to some version
+    if [[ ! -L "${default_assembly_summary}"  ]]; then
+        echo "assembly_summary.txt is not a link to any version [${default_assembly_summary}]"; exit 1
+    fi
     current_assembly_summary="$(readlink -m ${default_assembly_summary})"
     current_output_prefix="$(dirname ${current_assembly_summary})/"
     current_label="$(basename ${current_output_prefix})" 
@@ -873,7 +1183,11 @@ if [[ "${MODE}" == "NEW" ]] || [[ "${MODE}" == "UPDATE" ]]; then # with new info
     new_assembly_summary="${new_output_prefix}assembly_summary.txt"
     # If file already exists and it's a new repo
     if [[ -f "${new_assembly_summary}" ]]; then
-        echo "Cannot start a new repository with an existing assembly_summary.txt in the new directory [${new_assembly_summary}]"; exit 1;
+        if [[ ! -z "${label}" ]]; then 
+            echo "Label ["${label}"] already used. Please set another label with -b"; exit 1;
+        else 
+            echo "Cannot start a new repository with an existing assembly_summary.txt in the new directory [${new_assembly_summary}]"; exit 1;
+        fi
     fi
     mkdir -p "${new_output_prefix}${files_dir}"
 fi
@@ -897,46 +1211,10 @@ if [ "${silent}" -eq 0 ]; then
 fi
 
 echolog "--- genome_updater version: ${version} ---" "0"
-echolog "args: ${genome_updater_args}" "0"
-echolog "Mode: ${MODE} - $(if [[ "${dry_run}" -eq 1 ]]; then echo "DRY-RUN"; else echo "DOWNLOAD"; fi)" "1"
-echolog "Timestamp: ${timestamp}" "0"
-echolog "Database: ${database}" "0"
-echolog "Organims group: ${organism_group}" "0"
-echolog "Species: ${species}" "0"
-echolog "Taxids: ${taxids}" "0"
-echolog "Refseq category: ${refseq_category}" "0"
-echolog "Assembly level: ${assembly_level}" "0"
-echolog "Custom filter: ${custom_filter}" "0"
-echolog "File formats: ${file_formats}" "0"
-echolog "Top assemblies species: ${top_assemblies_species}" "0"
-echolog "Top assemblies taxids: ${top_assemblies_taxids}" "0"
-echolog "Date start: ${date_start}" "0"
-echolog "Date end: ${date_end}" "0"
-echolog "GTDB Only: ${gtdb_only}" "0"
-echolog "Download taxonomy: ${download_taxonomy}" "0"
-echolog "Dry-run: ${dry_run}" "0"
-echolog "Fix/recover: ${just_fix}" "0"
-echolog "Retries download in batches: ${retry_download_batch}" "0"
-echolog "Delete extra files: ${delete_extra_files}" "0"
-echolog "Check md5: ${check_md5}" "0"
-echolog "Output updated assembly accessions: ${updated_assembly_accession}" "0"
-echolog "Output updated sequence accessions: ${updated_sequence_accession}" "0"
-echolog "Conditional exit status: ${conditional_exit}" "0"
-echolog "Silent: ${silent}" "0"
-echolog "Silent with progress and version: ${silent_progress}" "0"
-echolog "Output URLs: ${url_list}" "0"
-echolog "External assembly summary: ${external_assembly_summary}" "0"
-echolog "Threads: ${threads}" "0"
-echolog "Verbose log: ${verbose_log}" "0"
-echolog "Working directory: ${working_dir}" "1"
-echolog "Label: ${label}" "0"
-echolog "Rollback label: ${rollback_label}" "0"
-if [[ "${use_curl}" -eq 1 ]]; then
-    echolog "Downloader: curl" "0"
-else
-    echolog "Downloader: wget" "0"
-fi
-echolog "-------------------------------------------" "1"
+echolog "Mode: ${MODE} $(if [[ "${dry_run}" -eq 1 ]]; then echo "(DRY-RUN)"; fi)" "1"
+echolog "Args: ${genome_updater_args}${bool_args}" "1"
+echolog "Outp: ${working_dir}" "1"
+echolog "-------------------------------------" "1"
 
 if [ "${debug_mode}" -eq 1 ] ; then 
     ls -laR "${working_dir}"
@@ -951,21 +1229,32 @@ if [[ "${MODE}" == "NEW" ]]; then
 
     if [[ ! -z "${external_assembly_summary}" ]]; then
         echolog "Using external assembly summary [$(readlink -m ${external_assembly_summary})]" "1"
-        # Skip possible header lines
-        grep -v "^#" "${external_assembly_summary}" > "${new_assembly_summary}";
-        echolog " - Database [${database}] selection is ignored when using an external assembly summary" "1";
+        # Skip possible header lines (|| true -> do not output error if none)
+        grep -v "^#" "${external_assembly_summary}" > "${new_assembly_summary}" || true
+        if ! check_assembly_summary "${new_assembly_summary}"; then 
+            echolog " - Invalid external assembly_summary.txt" "1"
+            exit 1; 
+        fi
         all_lines=$(count_lines_file "${new_assembly_summary}")
     else
         echolog "Downloading assembly summary [${new_label}]" "1"
         echolog " - Database [${database}]" "1"
         if [[ ! -z "${organism_group}" ]]; then
-            echolog " - Organism group [${organism_group}]" "1";
+            echolog " - Organism group [${organism_group}]" "1"
+        fi
+        if ! get_assembly_summary "${new_assembly_summary}" "${database}" "${organism_group}"; then 
+            echolog " - Failed to download one or more assembly_summary files" "1"
+            exit 1; 
         fi
-        all_lines=$(get_assembly_summary "${new_assembly_summary}" "${database}" "${organism_group}")
+        all_lines=$(count_lines_file "${new_assembly_summary}")
     fi
     echolog " - ${all_lines} assembly entries available" "1"
-
-    filter_assembly_summary "${new_assembly_summary}" ${all_lines}
+    echolog "" "1"
+    echolog "Filtering assembly summary [${new_label}]" "1"
+    if ! filter_assembly_summary "${new_assembly_summary}" ${all_lines}; then
+        echolog " - Failed" "1";
+        exit 1;
+    fi
     filtered_lines=$(count_lines_file "${new_assembly_summary}")
     echolog " - ${filtered_lines} assembly entries to download" "1"
     echolog "" "1"
@@ -979,23 +1268,25 @@ if [[ "${MODE}" == "NEW" ]]; then
         # Set version - link new assembly as the default
         ln -s -r "${new_assembly_summary}" "${default_assembly_summary}"
         # Add entry on history
-        write_history "" ${new_label} ${timestamp} ${new_assembly_summary} "1"
+        write_history ${new_label} ${new_label} ${timestamp} ${new_assembly_summary}
 
         if [[ "${filtered_lines}" -gt 0 ]] ; then
-            echolog " - Downloading $((filtered_lines*(n_formats+1))) files with ${threads} threads" "1"
+            echolog "Downloading $((filtered_lines*(n_formats+1))) files with ${threads} threads" "1"
             download_files "${new_assembly_summary}" "1,20" "${file_formats}"
             echolog "" "1"
-            # UPDATED INDICES assembly accession
+
             if [ "${updated_assembly_accession}" -eq 1 ]; then 
-                output_assembly_accession "${new_assembly_summary}" "1,20" "${file_formats}" "A" > "${new_output_prefix}updated_assembly_accession.txt"
-                echolog "Assembly accession report written [${new_output_prefix}updated_assembly_accession.txt]" "1"
+                echolog "Writing assembly accession report" "1"
+                output_assembly_accession "${new_assembly_summary}" "1,20" "${file_formats}" "A" > "${new_output_prefix}${timestamp}_assembly_accession.txt"
+                echolog " - ${new_output_prefix}${timestamp}_assembly_accession.txt" "1"
+                echolog "" "1"
             fi
-            # UPDATED INDICES sequence accession
-            if [[ "${file_formats}" =~ "assembly_report.txt" ]] && [ "${updated_sequence_accession}" -eq 1 ]; then
-                output_sequence_accession "${new_assembly_summary}" "1,20" "${file_formats}" "A" "${new_assembly_summary}" > "${new_output_prefix}updated_sequence_accession.txt"
-                echolog "Sequence accession report written [${new_output_prefix}updated_sequence_accession.txt]" "1"
+            if [ "${updated_sequence_accession}" -eq 1 ]; then
+                echolog "Writing sequence accession report" "1"
+                output_sequence_accession "${new_assembly_summary}" "1,20" "${file_formats}" "A" "${new_assembly_summary}" > "${new_output_prefix}${timestamp}_sequence_accession.txt"
+                echolog " - ${new_output_prefix}${timestamp}_sequence_accession.txt" "1"
+                echolog "" "1"
             fi
-            echolog "" "1"
         fi
     fi
     
@@ -1007,25 +1298,31 @@ else # update/fix
 
     # Check for missing files on current version
     echolog "Checking for missing files in the current version [${current_label}]" "1"
-    missing="${working_dir}missing.tmp"
+    missing=$(tmp_file "missing.tmp")
     check_missing_files "${current_assembly_summary}" "1,20" "${file_formats}" > "${missing}" # assembly accession, url, filename
     missing_lines=$(count_lines_file "${missing}")
+
     if [ "${missing_lines}" -gt 0 ]; then
         echolog " - ${missing_lines} missing files" "1"
         if [ "${dry_run}" -eq 0 ]; then
-            echolog " - Downloading ${missing_lines} files with ${threads} threads"    "1"
+            if [ "${just_fix}" -eq 1 ]; then
+                write_history ${current_label} "" ${timestamp} ${current_assembly_summary}
+            fi
+            echolog "Downloading ${missing_lines} files with ${threads} threads" "1"
             download_files "${missing}" "2,3"
             echolog "" "1"
             # if new files were downloaded, rewrite reports (overwrite information on Removed accessions - all become Added)
             if [ "${updated_assembly_accession}" -eq 1 ]; then 
-                output_assembly_accession "${current_assembly_summary}" "1,20" "${file_formats}" "A" > "${current_output_prefix}updated_assembly_accession.txt"
-                echolog "Assembly accession report rewritten [${current_output_prefix}updated_assembly_accession.txt]" "1"
-                echolog " - In fix mode, all entries are report as 'A' (Added)" "1"
+                echolog "Writing assembly accession report" "1"
+                output_assembly_accession "${missing}" "1,2" "${file_formats}" "A" > "${current_output_prefix}${timestamp}_assembly_accession.txt"
+                echolog " - ${current_output_prefix}${timestamp}_assembly_accession.txt" "1"
+                echolog "" "1"
             fi
-            if [[ "${file_formats}" =~ "assembly_report.txt" ]] && [ "${updated_sequence_accession}" -eq 1 ]; then
-                output_sequence_accession "${current_assembly_summary}" "1,20" "${file_formats}" "A" "${current_assembly_summary}" > "${current_output_prefix}updated_sequence_accession.txt"
-                echolog "Sequence accession report rewritten [${current_output_prefix}updated_sequence_accession.txt]" "1"
-                echolog " - In fix mode, all entries are report as 'A' (Added)" "1"
+            if [ "${updated_sequence_accession}" -eq 1 ]; then
+                echolog "Writing sequence accession report" "1"
+                output_sequence_accession "${missing}" "1,2" "${file_formats}" "A" "${current_assembly_summary}" > "${current_output_prefix}${timestamp}_sequence_accession.txt"
+                echolog " - ${current_output_prefix}${timestamp}_sequence_accession.txt" "1"
+                echolog "" "1"
             fi
         fi
     else
@@ -1033,9 +1330,9 @@ else # update/fix
     fi
     echolog "" "1"
     rm "${missing}"
-    
+
     echolog "Checking for extra files in the current version [${current_label}]" "1"
-    extra="${working_dir}extra.tmp"
+    extra=$(tmp_file "extra.tmp")
     join <(ls -1 "${current_output_prefix}${files_dir}" | sort) <(list_files "${current_assembly_summary}" "1,20" "${file_formats}" | cut -f 3 | sed -e 's/.*\///' | sort) -v 1 > "${extra}"
     extra_files=$(count_lines_file "${extra}")
     if [ "${extra_files}" -gt 0 ]; then
@@ -1051,7 +1348,7 @@ else # update/fix
     fi
     echolog "" "1"
     rm "${extra}"
-    
+
     if [[ "${MODE}" == "UPDATE" ]]; then
 
         # change TARGET for update
@@ -1063,28 +1360,37 @@ else # update/fix
         if [[ ! -z "${organism_group}" ]]; then
             echolog " - Organism group [${organism_group}]" "1";
         fi
-        all_lines=$(get_assembly_summary "${new_assembly_summary}" "${database}" "${organism_group}")
-        echolog " - ${all_lines} assembly entries available" "1"
+        if ! get_assembly_summary "${new_assembly_summary}" "${database}" "${organism_group}"; then 
+            echolog " - Failed to download one or more assembly_summary files" "1";   
+            exit 1; 
+        fi
+        all_lines=$(count_lines_file "${new_assembly_summary}")
 
-        filter_assembly_summary "${new_assembly_summary}" ${all_lines}
+        echolog " - ${all_lines} assembly entries available" "1"
+        echolog "" "1"
+        echolog "Filtering assembly summary [${new_label}]" "1"
+        if ! filter_assembly_summary "${new_assembly_summary}" ${all_lines}; then
+            echolog " - Failed" "1";
+            exit 1;
+        fi
         filtered_lines=$(count_lines_file "${new_assembly_summary}")
         echolog " - ${filtered_lines} assembly entries to download" "1"
         echolog "" "1"
         
-        update=${working_dir}update.tmp
-        delete=${working_dir}delete.tmp
-        new=${working_dir}new.tmp
+        update=$(tmp_file "update.tmp")
+        remove=$(tmp_file "remove.tmp")
+        new=$(tmp_file "new.tmp")
         # UPDATED (verify if version or date changed)
         join <(awk -F '\t' '{acc_ver=$1; gsub("\\.[0-9]*","",$1); gsub("/","",$15); print $1,acc_ver,$15,$20}' ${new_assembly_summary} | sort -k 1,1) <(awk -F '\t' '{acc_ver=$1; gsub("\\.[0-9]*","",$1); gsub("/","",$15); print $1,acc_ver,$15,$20}' ${current_assembly_summary} | sort -k 1,1) -o "1.2,1.3,1.4,2.2,2.3,2.4" | awk '{if($2>$5 || $1!=$4){print $1"\t"$3"\t"$4"\t"$6}}' > ${update}
         update_lines=$(count_lines_file "${update}")
-        # DELETED
-        join <(cut -f 1 ${new_assembly_summary} | sed 's/\.[0-9]*//g' | sort) <(awk -F '\t' '{acc_ver=$1; gsub("\\.[0-9]*","",$1); print $1,acc_ver,$20}' ${current_assembly_summary} | sort -k 1,1) -v 2 -o "2.2,2.3" | tr ' ' '\t' > ${delete}
-        delete_lines=$(count_lines_file "${delete}")
+        # REMOVED
+        join <(cut -f 1 ${new_assembly_summary} | sed 's/\.[0-9]*//g' | sort) <(awk -F '\t' '{acc_ver=$1; gsub("\\.[0-9]*","",$1); print $1,acc_ver,$20}' ${current_assembly_summary} | sort -k 1,1) -v 2 -o "2.2,2.3" | tr ' ' '\t' > ${remove}
+        remove_lines=$(count_lines_file "${remove}")
         # NEW
         join <(awk -F '\t' '{acc_ver=$1; gsub("\\.[0-9]*","",$1); print $1,acc_ver,$20}' ${new_assembly_summary} | sort -k 1,1) <(cut -f 1 ${current_assembly_summary} | sed 's/\.[0-9]*//g' | sort) -o "1.2,1.3" -v 1 | tr ' ' '\t' > ${new}
         new_lines=$(count_lines_file "${new}")
         echolog "Updates available [${current_label} --> ${new_label}]" "1"
-        echolog " - ${update_lines} updated, ${delete_lines} deleted, ${new_lines} new entries" "1"
+        echolog " - ${update_lines} updated, ${remove_lines} removed, ${new_lines} new entries" "1"
         echolog "" "1"
 
         if [ "${dry_run}" -eq 1 ]; then
@@ -1094,75 +1400,92 @@ else # update/fix
             echolog "Linking versions [${current_label} --> ${new_label}]" "1"
             # Only link existing files relative to the current version
             list_files "${current_assembly_summary}" "1,20" "${file_formats}" | cut -f 3 | xargs -P "${threads}" -I{} bash -c 'if [[ -f '"${current_output_prefix}${files_dir}{}"' ]]; then ln -s -r '"${current_output_prefix}${files_dir}{}"' '"${new_output_prefix}${files_dir}"'; fi'
-            echolog " - Done." "1"
+            echolog " - Done" "1"
             echolog "" "1"
             # set version - update default assembly summary
             echolog "Setting-up new version [${new_label}]" "1"
             rm "${default_assembly_summary}"
             ln -s -r "${new_assembly_summary}" "${default_assembly_summary}"
             # Add entry on history
-            write_history ${current_label} ${new_label} ${timestamp} ${new_assembly_summary} "0"
-            echolog " - Done." "1"
+            write_history ${current_label} ${new_label} ${timestamp} ${new_assembly_summary}
+            echolog " - Done" "1"
             echolog "" "1"
 
             # UPDATED INDICES assembly accession
             if [ "${updated_assembly_accession}" -eq 1 ]; then 
-                output_assembly_accession "${update}" "3,4" "${file_formats}" "R" > "${new_output_prefix}updated_assembly_accession.txt"
-                output_assembly_accession "${delete}" "1,2" "${file_formats}" "R" >> "${new_output_prefix}updated_assembly_accession.txt"
+                output_assembly_accession "${update}" "3,4" "${file_formats}" "R" > "${new_output_prefix}${timestamp}_assembly_accession.txt"
+                output_assembly_accession "${remove}" "1,2" "${file_formats}" "R" >> "${new_output_prefix}${timestamp}_assembly_accession.txt"
             fi
             # UPDATED INDICES sequence accession (removed entries - do it before deleting them)
-            if [[ "${file_formats}" =~ "assembly_report.txt" ]] && [ "${updated_sequence_accession}" -eq 1 ]; then
+            if [ "${updated_sequence_accession}" -eq 1 ]; then
                 # current_assembly_summary is the old summary
-                output_sequence_accession "${update}" "3,4" "${file_formats}" "R" "${current_assembly_summary}" > "${new_output_prefix}updated_sequence_accession.txt"
-                output_sequence_accession "${delete}" "1,2" "${file_formats}" "R" "${current_assembly_summary}" >> "${new_output_prefix}updated_sequence_accession.txt"
+                output_sequence_accession "${update}" "3,4" "${file_formats}" "R" "${current_assembly_summary}" > "${new_output_prefix}${timestamp}_sequence_accession.txt"
+                output_sequence_accession "${remove}" "1,2" "${file_formats}" "R" "${current_assembly_summary}" >> "${new_output_prefix}${timestamp}_sequence_accession.txt"
             fi
             
             # Execute updates
             echolog "Updating" "1"
             if [ "${update_lines}" -gt 0 ]; then
-                echolog " - UPDATE: Deleting $((update_lines*(n_formats+1))) files " "1"
-                # delete old version
+                echolog " - UPDATE: Removing $((update_lines*(n_formats+1))) files " "1"
+                # remove old version
                 del_lines=$(remove_files "${update}" "3,4" "${file_formats}")
-                echolog " - ${del_lines} files successfully deleted " "1"
+                echolog " - ${del_lines} files successfully removed from the current version" "1"
                 echolog " - UPDATE: Downloading $((update_lines*(n_formats+1))) files with ${threads} threads" "1"
                 # download new version
                 download_files "${update}" "1,2" "${file_formats}"
             fi
-            if [ "${delete_lines}" -gt 0 ]; then
-                echolog " - DELETE: Deleting $((delete_lines*(n_formats+1))) files" "1"
-                del_lines=$(remove_files "${delete}" "1,2" "${file_formats}")
-                echolog " - ${del_lines} files successfully deleted " "1"
+            if [ "${remove_lines}" -gt 0 ]; then
+                echolog " - REMOVE: Removing $((remove_lines*(n_formats+1))) files" "1"
+                del_lines=$(remove_files "${remove}" "1,2" "${file_formats}")
+                echolog " - ${del_lines} files successfully removed from the current version" "1"
             fi
             if [ "${new_lines}" -gt 0 ]; then
                 echolog " - NEW: Downloading $((new_lines*(n_formats+1))) files with ${threads} threads"    "1"
                 download_files "${new}" "1,2" "${file_formats}"
             fi 
-            echolog " - Done." "1"
+            echolog " - Done" "1"
             echolog "" "1"
 
             # UPDATED INDICES assembly accession (added entries - do it after downloading them)
             if [ "${updated_assembly_accession}" -eq 1 ]; then 
-                output_assembly_accession "${update}" "1,2" "${file_formats}" "A" >> "${new_output_prefix}updated_assembly_accession.txt"
-                output_assembly_accession "${new}" "1,2" "${file_formats}" "A" >> "${new_output_prefix}updated_assembly_accession.txt"
-                echolog "Assembly accession report written [${new_output_prefix}updated_assembly_accession.txt]" "1"
+                echolog "Writing assembly accession report" "1"
+                output_assembly_accession "${update}" "1,2" "${file_formats}" "A" >> "${new_output_prefix}${timestamp}_assembly_accession.txt"
+                output_assembly_accession "${new}" "1,2" "${file_formats}" "A" >> "${new_output_prefix}${timestamp}_assembly_accession.txt"
+                echolog " - ${new_output_prefix}${timestamp}_assembly_accession.txt" "1"
+                echolog "" "1"
             fi
             # UPDATED INDICES sequence accession (added entries - do it after downloading them)
-            if [[ "${file_formats}" =~ "assembly_report.txt" ]] && [ "${updated_sequence_accession}" -eq 1 ]; then
-                output_sequence_accession "${update}" "1,2" "${file_formats}" "A" "${new_assembly_summary}">> "${new_output_prefix}updated_sequence_accession.txt"
-                output_sequence_accession "${new}" "1,2" "${file_formats}" "A" "${new_assembly_summary}" >> "${new_output_prefix}updated_sequence_accession.txt"
-                echolog "Sequence accession report written [${new_output_prefix}updated_sequence_accession.txt]" "1"
+            if [ "${updated_sequence_accession}" -eq 1 ]; then
+                echolog "Writing sequence accession report" "1"
+                output_sequence_accession "${update}" "1,2" "${file_formats}" "A" "${new_assembly_summary}">> "${new_output_prefix}${timestamp}_sequence_accession.txt"
+                output_sequence_accession "${new}" "1,2" "${file_formats}" "A" "${new_assembly_summary}" >> "${new_output_prefix}${timestamp}_sequence_accession.txt"
+                echolog " - ${new_output_prefix}${timestamp}_sequence_accession.txt" "1"
+                echolog "" "1"
             fi
         fi
         # Remove update files
-        rm ${update} ${delete} ${new}
+        rm ${update} ${remove} ${new}
     fi
 fi
 
 if [ "${dry_run}" -eq 0 ]; then
     if [ "${download_taxonomy}" -eq 1 ]; then
-        echolog "Downloading current Taxonomy database [${target_output_prefix}taxdump.tar.gz] " "1"
-        download_static "${base_url}/pub/taxonomy/taxdump.tar.gz" "${target_output_prefix}taxdump.tar.gz"
-        echolog " - Done" "1"
+        echolog "Downloading taxonomy database [${tax_mode}]" "1"
+        if [[ "${tax_mode}" == "ncbi" ]]; then
+            if ! download_retry_md5 "${base_url}/pub/taxonomy/taxdump.tar.gz" "${target_output_prefix}taxdump.tar.gz" "${base_url}/pub/taxonomy/taxdump.tar.gz.md5" "${retry_download_batch}"; then
+                echolog " - Failed" "1"
+            else
+                echolog " - ${target_output_prefix}taxdump.tar.gz" "1"
+            fi
+        else
+            for url in "${gtdb_urls[@]}"; do
+                if ! download_retry_md5 "${url}" "${target_output_prefix}${url##*/}" "https://data.gtdb.ecogenomic.org/releases/release207/207.0/MD5SUM" "${retry_download_batch}"; then
+                    echolog " - Failed" "1"
+                else
+                    echolog "${target_output_prefix}${url##*/}" "1"
+                fi
+            done
+        fi
         echolog "" "1"
     fi
 
@@ -1184,12 +1507,9 @@ if [ "${dry_run}" -eq 0 ]; then
     fi
     echolog "# Current version: $(dirname $(readlink -m ${default_assembly_summary}))" "1"
     echolog "# Log file       : ${log_file}" "1"
+    echolog "# History        : ${history_file}" "1"
     [ "${silent}" -eq 0 ] && print_line
 
-    if [ "${silent_progress}" -eq 1 ] ; then
-        echo "$(dirname $(readlink -m ${default_assembly_summary}))"
-    fi
-
     if [ "${debug_mode}" -eq 1 ] ; then 
         ls -laR "${working_dir}"
     fi
diff --git a/tests/README.md b/tests/README.md
index cfd7d2a..633e0bf 100755
--- a/tests/README.md
+++ b/tests/README.md
@@ -2,4 +2,4 @@
 
 genome_updater uses the [bats](https://github.com/bats-core/bats-core) testing framework for Bash.
 
-Use the `download_test_set.sh` to re-create a random set of offline files to test. Files will be downloaded to `files/genomes`.
+Use the `download_test_set.sh` to re-create a random set of offline files to test. Files will be downloaded to `files/genomes` and filtered taxonomies to `files/pub/taxonomy/new_taxdump` [ncbi] and `releases/latest` [gtdb].
diff --git a/tests/download_test_set.sh b/tests/download_test_set.sh
index 8ecc57a..9f37fef 100755
--- a/tests/download_test_set.sh
+++ b/tests/download_test_set.sh
@@ -23,9 +23,38 @@ do
         fi
         head -n 2 "full_assembly_summary.txt" > "${out_as}"
         tail -n+3 "full_assembly_summary.txt" | shuf | head -n ${entries} >> "${out_as}"
+        # create a dummy historical for gtdb tests (just a copy)
+        cp "${out_as}" "${out_as%.*}_historical.txt"
+        # Download files
         tail -n+3 "${out_as}" | cut -f 20 | sed 's/https:/ftp:/g' | xargs -P ${entries} wget --quiet --show-progress --directory-prefix="${outfld}" --recursive --level 2 --accept "${ext}"
         cp -r "${outfld}ftp.ncbi.nlm.nih.gov/genomes/" "${outfld}"
         rm -rf "full_assembly_summary.txt" "${outfld}ftp.ncbi.nlm.nih.gov/" 
     done
 done
 
+# Download and filter taxonomies for used accessions/taxids
+
+# Get used accessions and taxids
+cut -f 1,6 ${outfld}genomes/*/assembly_summary_*.txt ${outfld}genomes/*/*/assembly_summary.txt | grep -v "^#" | sort | uniq > ${outfld}accessions_taxids.txt
+# ncbi new_taxdump
+wget --quiet --show-progress --output-document "${outfld}new_taxdump.tar.gz" "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"
+tar xf "${outfld}new_taxdump.tar.gz" -C "${outfld}" taxidlineage.dmp rankedlineage.dmp
+mkdir -p "${outfld}pub/taxonomy/new_taxdump/"
+cat "${outfld}accessions_taxids.txt" | xargs -l bash -c 'grep "[^0-9]${1}[^0-9]" "'${outfld}'taxidlineage.dmp"' >> "${outfld}pub/taxonomy/new_taxdump/taxidlineage.dmp"
+cat "${outfld}accessions_taxids.txt" | xargs -l bash -c 'grep "^${1}[^0-9]" "'${outfld}'rankedlineage.dmp"' >> "${outfld}pub/taxonomy/new_taxdump/rankedlineage.dmp"
+find "${outfld}pub/taxonomy/new_taxdump/" -printf "%P\n" | tar -czf "${outfld}pub/taxonomy/new_taxdump/new_taxdump.tar.gz" --no-recursion -C "${outfld}pub/taxonomy/new_taxdump/" -T -
+md5sum "${outfld}pub/taxonomy/new_taxdump/new_taxdump.tar.gz" > "${outfld}pub/taxonomy/new_taxdump/new_taxdump.tar.gz.md5"
+rm "${outfld}new_taxdump.tar.gz" "${outfld}taxidlineage.dmp" "${outfld}rankedlineage.dmp" "${outfld}pub/taxonomy/new_taxdump/taxidlineage.dmp" "${outfld}pub/taxonomy/new_taxdump/rankedlineage.dmp"
+
+#gtdb
+gtdb_out="${outfld}releases/release207/207.0/"
+mkdir -p "${gtdb_out}"
+gtdb_tax=( "ar53_taxonomy_r207.tsv.gz" "bac120_taxonomy_r207.tsv.gz" )
+for tax in "${gtdb_tax[@]}"; do
+    wget --quiet --show-progress --output-document "${outfld}${tax}" "https://data.gtdb.ecogenomic.org/releases/release207/207.0/${tax}"
+    join -1 1 -2 1 <(cut -f 1 "${outfld}accessions_taxids.txt" | sort) <(zcat "${outfld}${tax}" | awk 'BEGIN{FS=OFS="\t"}{print $1,$1,$2}' | sed -r 's/^.{3}//' | sort) -t$'\t' -o "2.2,2.3" | gzip > "${gtdb_out}${tax}"
+    rm "${outfld}${tax}"
+done
+
+md5sum ${gtdb_out}*.tsv.gz > "${gtdb_out}MD5SUM"
+rm ${outfld}accessions_taxids.txt
diff --git a/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt b/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt
new file mode 100644
index 0000000..58ba759
--- /dev/null
+++ b/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCA_903930505.1	PRJEB38681	SAMEA6952057	CAIYYQ000000000.1	na	2026739	2026739	Euryarchaeota archaeon		AlinenSedimentsCore2_bin-0840	latest	Contig	Major	Full	2020/07/18	freshwater MAG --- AlinenSedimentsCore2_bin-0840	BILS	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/930/505/GCA_903930505.1_freshwater_MAG_---_AlinenSedimentsCore2_bin-0840	derived from metagenome; genus undefined		na
+GCA_903858355.1	PRJEB38681	SAMEA6954579	CAIOIP000000000.1	na	2220064	2220064	uncultured Candidatus Micrarchaeota archaeon		AlinenSedimentsD1_bin-0133	latest	Contig	Major	Full	2020/07/16	freshwater MAG --- AlinenSedimentsD1_bin-0133	BILS	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/858/355/GCA_903858355.1_freshwater_MAG_---_AlinenSedimentsD1_bin-0133	derived from environmental source; derived from metagenome		na
+GCA_016839815.1	PRJNA680430	SAMN16492231	JAEOTM000000000.1	na	2800102	2800102	Candidatus Hodarchaeota archaeon		YT2_004	latest	Contig	Major	Full	2021/02/09	ASM1683981v1	Shenzhen Univeristy	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/839/815/GCA_016839815.1_ASM1683981v1	derived from metagenome; genus undefined		na
+GCA_011389385.1	PRJNA480137	SAMN09639886	DTGE00000000.1	na	2026714	2026714	Candidatus Bathyarchaeota archaeon		SpSt-755	latest	Contig	Major	Full	2020/03/17	ASM1138938v1	The University of Hong Kong	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/389/385/GCA_011389385.1_ASM1138938v1	derived from metagenome; genus undefined		na
+GCA_017656495.1	PRJNA635695	SAMN15049706	JACDNS000000000.1	na	35749	35749	Thermococcus sp.		GB_MAG1_027	latest	Contig	Major	Full	2021/04/01	ASM1765649v1	Marine Biological Laboratory	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/656/495/GCA_017656495.1_ASM1765649v1	derived from metagenome		na
+GCA_018645535.1	PRJNA630981	SAMN14913871	JABGWN000000000.1	na	2026739	2026739	Euryarchaeota archaeon		SI034_bin52	latest	Contig	Major	Full	2021/06/02	ASM1864553v1	The University of Melbourne	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/645/535/GCA_018645535.1_ASM1864553v1	derived from metagenome; genus undefined		na
+GCA_002499365.1	PRJNA348753	SAMN06027185	DALD00000000.1	na	1915872	1915872	Euryarchaeota archaeon UBA29		UBA29	latest	Scaffold	Major	Full	2017/10/10	ASM249936v1	University of Queensland	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/499/365/GCA_002499365.1_ASM249936v1	derived from metagenome; genus undefined		na
+GCA_004525575.1	PRJNA511814	SAMN11127074	SPCB00000000.1	na	2053491	2053491	Candidatus Thorarchaeota archaeon		das_tool.maxbin2.13	latest	Contig	Major	Full	2019/03/30	ASM452557v1	Radboud University Njmegen	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/525/575/GCA_004525575.1_ASM452557v1	derived from metagenome; genus undefined		na
+GCA_011335015.1	PRJNA480137	SAMN09639889	DTGH00000000.1	na	2250274	2250274	Candidatus Micrarchaeota archaeon		SpSt-758	latest	Contig	Major	Full	2020/03/16	ASM1133501v1	The University of Hong Kong	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/335/015/GCA_011335015.1_ASM1133501v1	derived from metagenome; genus undefined		na
+GCA_002069705.1	PRJNA321808	SAMN05004159	MWBV00000000.1	na	1852841	1852841	Candidatus Diapherotrites archaeon ADurb.Bin253		ADurb.Bin253	latest	Contig	Major	Full	2017/03/22	ASM206970v1	University of Illinois	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/069/705/GCA_002069705.1_ASM206970v1	derived from metagenome; genus undefined		na
+GCA_900316635.1	PRJEB21624	SAMEA104666887	ONDQ00000000.1	na	253161	253161	uncultured Methanobrevibacter sp.		RUG201	latest	Scaffold	Major	Full	2018/03/21	Rumen uncultured genome RUG201	THE ROSLIN INSTITUTE	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/316/635/GCA_900316635.1_Rumen_uncultured_genome_RUG201	derived from environmental source		na
+GCA_011388575.1	PRJNA480137	SAMN09638894	DRUB00000000.1	na	334771	334771	Ignisphaera aggregans		SpSt-1	latest	Contig	Major	Full	2020/03/17	ASM1138857v1	The University of Hong Kong	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/388/575/GCA_011388575.1_ASM1138857v1	derived from metagenome		na
+GCA_018304485.1	PRJNA288027	SAMN18341270	JAGVWB000000000.1	na	2026736	2026736	Candidatus Diapherotrites archaeon		RIFCSPLOWO2_01_FULL_43_13	latest	Scaffold	Major	Full	2021/05/07	ASM1830448v1	Banfield Lab, University of California, Berkeley	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/304/485/GCA_018304485.1_ASM1830448v1	derived from metagenome; genus undefined		na
+GCA_018676255.1	PRJNA630981	SAMN14914095	JABHFD000000000.1	na	2026739	2026739	Euryarchaeota archaeon		SI037_bin172	latest	Contig	Major	Full	2021/06/02	ASM1867625v1	The University of Melbourne	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/676/255/GCA_018676255.1_ASM1867625v1	derived from metagenome; genus undefined		na
+GCA_016196285.1	PRJNA640378	SAMN15435488	JACPXY000000000.1	na	2026773	2026773	Candidatus Pacearchaeota archaeon		NC_groundwater_849_Pr1_B-0.1um_42_10	latest	Contig	Major	Full	2020/12/21	ASM1619628v1	Innovative Genomics Institute	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/196/285/GCA_016196285.1_ASM1619628v1	derived from metagenome; genus undefined		na
+GCA_002497565.1	PRJNA348753	SAMN06027207	DADS00000000.1	na	1915824	1915824	Euryarchaeota archaeon UBA179		UBA179	latest	Scaffold	Major	Full	2017/10/10	ASM249756v1	University of Queensland	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/497/565/GCA_002497565.1_ASM249756v1	derived from metagenome; genus undefined		na
+GCA_902383905.1	PRJEB33885	SAMEA5851664		representative genome	1406512	1406512	Candidatus Methanomassiliicoccus intestinalis		MGYG-HGUT-02160	latest	Complete Genome	Major	Full	2019/08/10	UHGG_MGYG-HGUT-02160	EMG	GCF_902383905.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/383/905/GCA_902383905.1_UHGG_MGYG-HGUT-02160			na
+GCA_018692575.1	PRJNA630981	SAMN14914238	JABHKQ000000000.1	na	2026803	2026803	Candidatus Woesearchaeota archaeon		SI037S2_bin24	latest	Contig	Major	Full	2021/06/02	ASM1869257v1	The University of Melbourne	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/692/575/GCA_018692575.1_ASM1869257v1	derived from metagenome; genus undefined		na
+GCA_013390775.1	PRJNA640238	SAMN15312031	JACATB000000000.1	na	2511932	2511932	Marine Group I thaumarchaeote	strain=D11		latest	Scaffold	Major	Full	2020/07/06	ASM1339077v1	National Science Foundation of China	GCF_013390775.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/390/775/GCA_013390775.1_ASM1339077v1	genus undefined		na
+GCA_002727275.1	PRJNA391943	SAMN07618837	PBWO00000000.1	na	2026739	2026739	Euryarchaeota archaeon		RS814	latest	Contig	Major	Full	2017/10/26	ASM272727v1	Tara Oceans Consortium	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/727/275/GCA_002727275.1_ASM272727v1	derived from metagenome; genus undefined		na
diff --git a/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt b/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt
new file mode 100644
index 0000000..9677d66
--- /dev/null
+++ b/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCA_002566855.1	PRJNA400804	SAMN07598389	NUZM00000000.1	na	1396	1396	Bacillus cereus	strain=AFS074515		latest	Scaffold	Major	Full	2017/10/17	ASM256685v1	UNC Chapel Hill	GCF_002566855.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/566/855/GCA_002566855.1_ASM256685v1			na
+GCA_902635445.1	PRJEB33281	SAMEA6073950	CACPNU000000000.1	na	198431	198431	uncultured prokaryote			latest	Contig	Major	Full	2019/11/05	AG-915-F08	WOODS HOLE OCEANOGRAPHIC INSTITUTION	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/635/445/GCA_902635445.1_AG-915-F08	derived from environmental source; derived from metagenome		na
+GCA_017159575.1	PRJNA287430	SAMN17764286	AAZEKK000000000.1	na	197	197	Campylobacter jejuni	strain=FSIS12137393		latest	Contig	Major	Full	2021/03/03	PDT000946857.1	USDA FSIS	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/159/575/GCA_017159575.1_PDT000946857.1	from large multi-isolate project		na
+GCA_005728625.1	PRJNA280335	SAMN10715290	AADQWW000000000.1	na	28901	28901	Salmonella enterica	strain=ADRDL-2252		latest	Contig	Major	Full	2019/05/23	PDT000448312.1	US Food and Drug Administration	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/728/625/GCA_005728625.1_PDT000448312.1	from large multi-isolate project		na
+GCA_013911495.1	PRJNA638822	SAMN15215249	JACETB000000000.1	na	1131	1131	Synechococcus sp.		MCMED-G31	latest	Contig	Major	Full	2020/07/29	ASM1391149v1	Evolutionary Genomics Group	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/911/495/GCA_013911495.1_ASM1391149v1	derived from metagenome		na
+GCA_004008395.1				na	2499034	2499034	Mycobacterium phage Cici			latest	Complete Genome	Major	Full	2019/01/08	ASM400839v1		na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/008/395/GCA_004008395.1_ASM400839v1			na
+GCA_021355205.1				na	2894335	2894335	Burkholderia phage BgManors32			latest	Complete Genome	Major	Full	2021/11/22	ASM2135520v1		na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/355/205/GCA_021355205.1_ASM2135520v1			na
+GCA_003635585.1	PRJNA374603	SAMN06329599	MVSU00000000.1	na	210	210	Helicobacter pylori	strain=HPAS14		latest	Contig	Major	Full	2018/10/12	ASM363558v1	University of Western Australia	GCF_003635585.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/635/585/GCA_003635585.1_ASM363558v1			na
+GCA_012763735.1	PRJNA277984	SAMN04510396	AATCVN000000000.1	na	562	562	Escherichia coli	strain=CDPHFDLB-F1602032-026A		latest	Contig	Major	Full	2020/04/23	PDT000113200.3	US Food and Drug Administration	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/763/735/GCA_012763735.1_PDT000113200.3	from large multi-isolate project		na
+GCA_013619715.1	PRJNA615626	SAMN14453445	JACEKU000000000.1	na	287	287	Pseudomonas aeruginosa	strain=LiP14		latest	Contig	Major	Full	2020/07/24	ASM1361971v1	University of Oxford	GCF_013619715.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/619/715/GCA_013619715.1_ASM1361971v1			na
+GCA_008787855.1	PRJNA292661	SAMN12842867	AALEUD000000000.1	na	28901	28901	Salmonella enterica	strain=CVM N19S0343		latest	Contig	Major	Full	2019/10/01	PDT000594120.1	FDA	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/787/855/GCA_008787855.1_PDT000594120.1	from large multi-isolate project		na
+GCA_903218915.1	PRJEB35770	SAMEA6813852	CAEZVL000000000.1	na	449393	449393	freshwater metagenome			latest	Contig	Major	Full	2020/06/05	UFOp-RE-23may17-586	BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/218/915/GCA_903218915.1_UFOp-RE-23may17-586	derived from environmental source; metagenome		na
+GCA_008201245.1	PRJNA248792	SAMN03479222	AAJWIJ000000000.1	na	90371	28901	Salmonella enterica subsp. enterica serovar Typhimurium	strain=7397		latest	Contig	Major	Full	2019/09/02	PDT000058697.2	Public Health England	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/201/245/GCA_008201245.1_PDT000058697.2	from large multi-isolate project		na
+GCA_011078725.1	PRJNA248792	SAMN03168749	AAPFHW000000000.1	na	90371	28901	Salmonella enterica subsp. enterica serovar Typhimurium	strain=H120980533		latest	Contig	Major	Full	2020/03/09	PDT000042974.4	Public Health England	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/078/725/GCA_011078725.1_PDT000042974.4	from large multi-isolate project		na
+GCA_013549135.1	PRJNA230403	SAMN15522001	AATZYI000000000.1	na	28901	28901	Salmonella enterica	strain=PNUSAS152956		latest	Contig	Major	Full	2020/07/23	PDT000787515.1	CDC	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/549/135/GCA_013549135.1_PDT000787515.1	from large multi-isolate project		na
+GCA_018937815.1	PRJNA218110	SAMN19697485	ABAWPX000000000.1	na	562	562	Escherichia coli	strain=PNUSAE074529		latest	Contig	Major	Full	2021/06/17	PDT001069867.1	CDC	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/937/815/GCA_018937815.1_PDT001069867.1	from large multi-isolate project		na
+GCA_005603115.1	PRJNA230403	SAMN11552442	AADIAU000000000.1	na	28901	28901	Salmonella enterica	strain=PNUSAS073825		latest	Contig	Major	Full	2019/05/21	PDT000496874.1	CDC	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/603/115/GCA_005603115.1_PDT000496874.1	from large multi-isolate project		na
+GCA_019997905.1	PRJNA685966	SAMN21249929		na	283734	283734	Staphylococcus pseudintermedius	strain=HSP149		latest	Complete Genome	Major	Full	2021/09/15	ASM1999790v1	Universitat Autonoma de Barcelona	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/997/905/GCA_019997905.1_ASM1999790v1	from large multi-isolate project		na
+GCA_011897165.1	PRJNA218110	SAMN12361411	AARDFA000000000.1	na	562	562	Escherichia coli	strain=PNUSAE027109		latest	Contig	Major	Full	2020/04/02	PDT000549212.2	CDC	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/897/165/GCA_011897165.1_PDT000549212.2	from large multi-isolate project		na
+GCA_015893745.1	PRJNA514245	SAMN15566993	DACSEB000000000.1	na	575	575	Raoultella planticola		MISC077	latest	Contig	Major	Full	2020/12/09	PDT000883933.1	National Center for Biotechnology Information	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/015/893/745/GCA_015893745.1_PDT000883933.1	from large multi-isolate project		na
diff --git a/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt b/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt
new file mode 100644
index 0000000..e6f940c
--- /dev/null
+++ b/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCA_003708985.2	PRJNA429441	SAMN08343249	PPPC00000000.2	representative genome	271357	271357	[Candida] gorgasii	strain=NRRL Y-27707		latest	Scaffold	Major	Full	2018/11/20	ASM370898v2	UW-Madison	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/708/985/GCA_003708985.2_ASM370898v2		assembly from type material	na
+GCA_001599295.1	PRJDB3621	SAMD00028341	BCGN00000000.1	representative genome	54094	54094	Sporopachydermia quercuum	strain=JCM 9486		latest	Scaffold	Major	Full	2016/03/01	JCM_9486_assembly_v001	RIKEN Center for Life Science Technologies, Division of Genomic Technologies	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/599/295/GCA_001599295.1_JCM_9486_assembly_v001			na
+GCA_001636725.1	PRJNA72737	SAMN04908328	AZHB00000000.1	representative genome	1081104	114497	Cordyceps fumosorosea ARSEF 2679	strain=ARSEF 2679		latest	Scaffold	Major	Full	2016/05/04	ISF 1.0	Shanghai Institutes for Biological Sciences, CAS	GCF_001636725.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/636/725/GCA_001636725.1_ISF_1.0			na
+GCA_000467735.1	PRJNA81799	SAMN02981409	AJFL00000000.1	representative genome	1136865	37885	Rhytidhysteron rufulum CBS 306.38	strain=CBS 306.38		latest	Contig	Major	Full	2013/09/16	ASM46773v1	Assembling the Fungal Tree of Life (AFTOL)	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/467/735/GCA_000467735.1_ASM46773v1			na
+GCA_001950535.1	PRJDB3737	SAMD00028438	BCKA00000000.1	representative genome	5077	5077	Penicillium citrinum	strain=JCM 22607		latest	Scaffold	Major	Full	2016/12/09	JCM_22607_assembly_v001	RIKEN Center for Life Science Technologies, Division of Genomic Technologies	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/950/535/GCA_001950535.1_JCM_22607_assembly_v001			na
+GCA_003277105.1	PRJNA396809	SAMN07436824	NPYI00000000.1	na	4932	4932	Saccharomyces cerevisiae	strain=HN7		latest	Chromosome	Major	Full	2018/11/28	ASM327710v1	Institute Of Microbiology Chinese Academy of Sciences	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/277/105/GCA_003277105.1_ASM327710v1			na
+GCA_009803805.1	PRJNA487060	SAMN09910564	RAMV00000000.1	na	29879	29879	Neurospora discreta	ecotype=NMWA, /strain=PS4BIDRA449		latest	Scaffold	Major	Full	2019/12/27	ASM980380v1	University of California, Berkeley	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/803/805/GCA_009803805.1_ASM980380v1			na
+GCA_019207905.1	PRJNA706707	SAMN18128823	JAHLVQ000000000.1	na	460523	460523	Ogataea polymorpha		Y-2423	latest	Scaffold	Major	Full	2021/07/13	ASM1920790v1	Colorado College	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/207/905/GCA_019207905.1_ASM1920790v1			na
+GCA_013282825.1	PRJNA602542	SAMN13878901	JAACJH000000000.1	na	156630	156630	Alternaria arborescens	strain=NRRL 20593		latest	Scaffold	Major	Full	2020/06/06	ASM1328282v1	US Department of Agriculture, Agriculture Research Service	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/282/825/GCA_013282825.1_ASM1328282v1			na
+GCA_012656115.1	PRJNA592352	SAMN13422809	JAAAQC000000000.1	na	746128	746128	Aspergillus fumigatus	strain=CNM-CM8686		latest	Scaffold	Major	Full	2020/04/22	ASM1265611v1	UNICAMP	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/656/115/GCA_012656115.1_ASM1265611v1			na
+GCA_905105095.1	PRJEB27419	SAMEA4753515	CAJHKB000000000.1	na	318829	318829	Pyricularia oryzae			latest	Scaffold	Major	Full	2020/11/22	Assembly of M.oryzae isolate BF48 genome	UNIVERSITY OF EXETER	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/105/095/GCA_905105095.1_Assembly_of_M.oryzae_isolate_BF48_genome			na
+GCA_004917135.1	PRJNA488010	SAMN10031622	QZAJ00000000.1	na	5580	5580	Aureobasidium pullulans	strain=EXF-11318		latest	Contig	Major	Full	2019/04/26	ASM491713v1	Biotechnical Faculty, University of Ljubljana	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/917/135/GCA_004917135.1_ASM491713v1			na
+GCA_007556565.1	PRJNA534185	SAMN11479276	SWCR00000000.1	representative genome	40997	40997	Elsinoe fawcettii		DAR-70024	latest	Scaffold	Major	Full	2019/07/26	ASM755656v1	Yeungnam University	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/007/556/565/GCA_007556565.1_ASM755656v1			na
+GCA_011022315.1	PRJNA522669	SAMN10948597		representative genome	27292	27292	Saccharomyces pastorianus	strain=CBS 1483		latest	Chromosome	Major	Full	2020/02/26	ASM1102231v1	Delft University of Technology	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/022/315/GCA_011022315.1_ASM1102231v1			na
+GCA_017867755.1	PRJNA680387	SAMN16879102	JAEDSJ000000000.1	na	4932	4932	Saccharomyces cerevisiae	strain=SAN33		latest	Scaffold	Major	Full	2021/04/08	ASM1786775v1	Institute of Microbiology	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/867/755/GCA_017867755.1_ASM1786775v1			na
+GCA_001680595.1	PRJNA289542	SAMN03857101	MAEE00000000.1	na	232081	232081	Fusarium tucumaniae	strain=NRRL 31781		latest	Contig	Major	Full	2016/07/06	ASM168059v1	Iowa State University	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/680/595/GCA_001680595.1_ASM168059v1			na
+GCA_018345925.1	PRJNA677929	SAMN16774514	JADPOE000000000.1	na	5518	5518	Fusarium graminearum	strain=042826		latest	Scaffold	Major	Full	2021/05/12	ASM1834592v1	University of Warmia and Mazury in Olsztyn	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/345/925/GCA_018345925.1_ASM1834592v1			na
+GCA_003705455.2	PRJNA429441	SAMN08343424	PPIN00000000.2	representative genome	54552	54552	Pichia occidentalis	strain=NRRL Y-7552		latest	Scaffold	Major	Full	2018/11/20	ASM370545v2	UW-Madison	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/705/455/GCA_003705455.2_ASM370545v2		assembly from type material	na
+GCA_000827315.1	PRJNA61203	SAMN00738176	JMDN00000000.1	representative genome	765440	80663	Piloderma croceum F 1598	strain=F 1598		latest	Scaffold	Major	Full	2015/01/30	Piloderma croceum F 1598 v1.0	DOE Joint Genome Institute	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/827/315/GCA_000827315.1_Piloderma_croceum_F_1598_v1.0			na
+GCA_905066965.2	PRJEB40915	SAMEA7473260	CAJHIF000000000.2	na	318829	318829	Pyricularia oryzae		AG059	latest	Contig	Major	Full	2020/12/16	AG059_contigs_polished	THE SAINSBURY LABORATORY	na	na	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/066/965/GCA_905066965.2_AG059_contigs_polished			na
diff --git a/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt b/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt
new file mode 100644
index 0000000..7019e3d
--- /dev/null
+++ b/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCF_004137855.1	PRJNA224116	SAMN08804086	QBKB00000000.1	representative genome	2138083	2138083	Methanohalophilus profundi	strain=SLHTYRO		latest	Scaffold	Major	Full	2019/02/05	ASM413785v1	UBO	GCA_004137855.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/137/855/GCF_004137855.1_ASM413785v1		assembly from type material	na
+GCF_009184545.1	PRJNA224116	SAMN09291540	QJOW00000000.1	representative genome	2212850	2212850	Halosegnis rubeus	strain=F17-44		latest	Scaffold	Major	Full	2019/10/19	ASM918454v1	University of Sevilla, Spain	GCA_009184545.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/184/545/GCF_009184545.1_ASM918454v1		assembly from type material	na
+GCF_009674625.1	PRJNA224116	SAMN13255728	WKJQ00000000.1	representative genome	2666143	2666143	Haloferax marinum	strain=MBLA0078		latest	Contig	Major	Full	2019/11/19	ASM967462v1	Incheon National University	GCA_009674625.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/674/625/GCF_009674625.1_ASM967462v1		assembly from type material	na
+GCF_002494345.1	PRJNA224116	SAMN07714153	NXNI00000000.1	representative genome	373386	373386	Natrinema ejinorense	strain=JCM 13890		latest	Contig	Major	Full	2017/10/03	ASM249434v1	World Institute of Kimchi	GCA_002494345.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/494/345/GCF_002494345.1_ASM249434v1		assembly from type material	na
+GCF_000513855.1	PRJNA224116	SAMN02597199	AZUU00000000.1	na	1150674	94694	Desulfurococcus amylolyticus Z-533	strain=Z-533		latest	Scaffold	Major	Full	2014/01/07	ASM51385v1	DOE Joint Genome Institute	GCA_000513855.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/513/855/GCF_000513855.1_ASM51385v1		assembly from type material	na
+GCF_900116205.1	PRJNA224116	SAMN04488556	FOZS00000000.1	representative genome	619731	619731	Halostagnicola kamekurae	strain=DSM 22427		latest	Contig	Major	Full	2016/11/02	IMG-taxon 2639762563 annotated assembly	DOE - JOINT GENOME INSTITUTE	GCA_900116205.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/116/205/GCF_900116205.1_IMG-taxon_2639762563_annotated_assembly		assembly from type material	na
+GCF_002215305.1	PRJNA224116	SAMN05822533	MKFG00000000.1	na	2247	2247	Halorubrum lacusprofundi	strain=DL18		latest	Contig	Major	Full	2017/07/06	ASM221530v1	University of NSW	GCA_002215305.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/215/305/GCF_002215305.1_ASM221530v1			na
+GCF_014202515.1	PRJNA224116	SAMN14908392	JACHGX000000000.1	na	2242	2242	Halobacterium salinarum	strain=DSM 669		latest	Contig	Major	Full	2020/08/14	ASM1420251v1	DOE Joint Genome Institute	GCA_014202515.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/014/202/515/GCF_014202515.1_ASM1420251v1		assembly from synonym type material	na
+GCF_002761295.1	PRJNA224116	SAMN05908879		representative genome	39664	39664	Methanohalophilus portucalensis	strain=FDF-1T		latest	Chromosome	Major	Full	2017/11/07	ASM276129v1	Macumba	GCA_002761295.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/761/295/GCF_002761295.1_ASM276129v1		assembly from type material	na
+GCF_000187225.1	PRJNA224116	SAMN02470763	AEMG00000000.1	na	797209	367189	Haladaptatus paucihalophilus DX253	strain=DX253		latest	Contig	Major	Full	2011/01/31	ASM18722v1	Oklahoma State University	GCA_000187225.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/187/225/GCF_000187225.1_ASM18722v1		assembly from type material	na
+GCF_005435225.1	PRJNA224116	SAMN10910413	SGXX00000000.1	na	1855858	1855858	Halorubrum sp. ASP121	strain=ASP121		latest	Contig	Major	Full	2019/05/16	ASM543522v1	University of Connecticut	GCA_005435225.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/435/225/GCF_005435225.1_ASM543522v1			na
+GCF_001560685.1	PRJNA224116	SAMN04305175	LPSN00000000.1	na	2285	2285	Sulfolobus acidocaldarius	strain=NG05B_CO5_08		latest	Contig	Major	Full	2016/02/11	NG05B_CO5_08	University of Illinois at Urbana-Champaign	GCA_001560685.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/560/685/GCF_001560685.1_NG05B_CO5_08			na
+GCF_900215575.1	PRJNA224116	SAMN06269185	OBEJ00000000.1	representative genome	558529	558529	Natronoarchaeum philippinense	strain=DSM 27208		latest	Contig	Major	Full	2017/09/28	IMG-taxon 2728369221 annotated assembly	DOE - JOINT GENOME INSTITUTE	GCA_900215575.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/215/575/GCF_900215575.1_IMG-taxon_2728369221_annotated_assembly		assembly from type material	na
+GCF_011319465.1	PRJNA224116	SAMN09786340	RCMB00000000.1	na	2341020	2341020	Candidatus Nitrosotalea sp. TS	strain=TS		latest	Contig	Major	Full	2020/03/16	ASM1131946v1	Chinese Academy of Sciences	GCA_011319465.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/319/465/GCF_011319465.1_ASM1131946v1			na
+GCF_005406325.1	PRJNA224116	SAMN11356524		representative genome	523841	2252	Haloferax mediterranei ATCC 33500	strain=ATCC 33500		latest	Complete Genome	Major	Full	2019/05/16	ASM540632v1	University of Maryland	GCA_005406325.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/406/325/GCF_005406325.1_ASM540632v1		assembly from type material	na
+GCF_005222525.1	PRJNA224116	SAMN09071532		representative genome	47304	47304	Metallosphaera prunae	strain=Ron 12/II		latest	Complete Genome	Major	Full	2019/05/07	ASM522252v1	North Carolina State University	GCA_005222525.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/222/525/GCF_005222525.1_ASM522252v1		assembly from type material	na
+GCF_000022465.1	PRJNA224116	SAMN02598422		na	439386	43080	Sulfolobus islandicus Y.G.57.14	strain=Y.G.57.14		latest	Complete Genome	Major	Full	2009/04/29	ASM2246v1	US DOE Joint Genome Institute	GCA_000022465.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/022/465/GCF_000022465.1_ASM2246v1			na
+GCF_000400975.1	PRJNA224116	SAMD00036650	BANO00000000.1	na	1261545	489138	Halarchaeum acidiphilum MH1-52-1	strain=MH1-52-1		latest	Contig	Major	Full	2013/05/16	ASM40097v1	Japan Agency for Marine-Earth Science and Technology	GCA_000400975.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/400/975/GCF_000400975.1_ASM40097v1		assembly from type material	na
+GCF_000245175.1	PRJNA224116	SAMN02471819	AHJO00000000.1	na	1132501	43080	Sulfolobus islandicus M.16.23	strain=M.16.23		latest	Chromosome	Major	Full	2012/01/25	ASM24517v2	University of Illinois	GCA_000245175.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/245/175/GCF_000245175.1_ASM24517v2			na
+GCF_000517445.1	PRJNA224116	SAMN03081513		representative genome	582419	582419	Thermococcus paralvinellae	strain=ES1		latest	Complete Genome	Major	Full	2014/01/10	ASM51744v1	Kyung Hee University	GCA_000517445.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/517/445/GCF_000517445.1_ASM51744v1		assembly from type material	na
diff --git a/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt b/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt
new file mode 100644
index 0000000..efc88d0
--- /dev/null
+++ b/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCF_001261215.1	PRJNA224116	SAMEA954728	CXAQ00000000.1	na	624	624	Shigella sonnei	strain=Sh74369_401064		latest	Scaffold	Major	Full	2015/07/25	5008_7#11	SC	GCA_001261215.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/261/215/GCF_001261215.1_5008_7_11			na
+GCF_002273625.1	PRJNA224116	SAMN03893265	LKWZ00000000.1	na	1280	1280	Staphylococcus aureus	strain=ISU 930		latest	Scaffold	Major	Full	2017/08/28	ISU-930_v1.0	USDA-ARS	GCA_002273625.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/273/625/GCF_002273625.1_ISU-930_v1.0			na
+GCF_000765925.2	PRJNA224116	SAMN03075569	JRPJ00000000.2	na	37372	37372	Helicobacter bilis	strain=ATCC 49320		latest	Contig	Major	Full	2019/05/22	ASM76592v2	Massachusetts Institute of Technology	GCA_000765925.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/765/925/GCF_000765925.2_ASM76592v2			na
+GCF_000560105.1	PRJNA224116	SAMN02383660	JDIQ00000000.1	na	1410740	1280	Staphylococcus aureus T66282	strain=T66282		latest	Scaffold	Major	Full	2014/02/06	Stap_aure_T66282_V1	Broad Institute	GCA_000560105.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/560/105/GCF_000560105.1_Stap_aure_T66282_V1			na
+GCF_000806405.1	PRJNA224116	SAMN03222688	JUKG00000000.1	na	1639	1639	Listeria monocytogenes	strain=BHU3		latest	Contig	Major	Full	2014/12/22	ASM80640v1	Banaras Hindu University	GCA_000806405.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/806/405/GCF_000806405.1_ASM80640v1			na
+GCF_010667555.1	PRJNA224116	SAMN12785273	VYSE00000000.1	na	1689	1689	Bifidobacterium dentium	strain=BRDF 23		latest	Contig	Major	Full	2020/02/14	ASM1066755v1	University of Bologna	GCA_010667555.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/667/555/GCF_010667555.1_ASM1066755v1			na
+GCF_004354475.1	PRJNA224116	SAMN08555025	PUFE00000000.1	na	214326	1599	Latilactobacillus sakei subsp. sakei	strain=ATCC 15521		latest	Contig	Major	Full	2019/03/18	ASM435447v1	Carlsberg Research Laboratory	GCA_004354475.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/354/475/GCF_004354475.1_ASM435447v1		assembly from type material	na
+GCF_010120835.1	PRJNA485481			na	2696432	2696432	Escherichia phage nieznany			latest	Complete Genome	Major	Full	2021/02/07	ASM1012083v1		GCA_010120835.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/120/835/GCF_010120835.1_ASM1012083v1			na
+GCF_904810345.1	PRJNA224116	SAMEA7336317		na	1806	1773	Mycobacterium tuberculosis variant microti	strain=Maus III	human	latest	Complete Genome	Major	Full	2021/01/27	MmicMaus3	IP	GCA_904810345.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/904/810/345/GCF_904810345.1_MmicMaus3			na
+GCF_002245175.1	PRJNA224116	SAMN03262752	LEMZ00000000.1	na	562	562	Escherichia coli	strain=272-3565		latest	Scaffold	Major	Full	2017/08/07	ASM224517v1	Broad Institute	GCA_002245175.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/245/175/GCF_002245175.1_ASM224517v1			na
+GCF_019703895.1	PRJNA224116	SAMD00254949		na	2779671	2779671	Streptomyces sp. EAS-AB2608	strain=NBRC 114648		latest	Complete Genome	Major	Full	2021/05/11	ASM1970389v1	Global Health Research Section, hhc Data Creation Center, Eisai Co., Ltd.	GCA_019703895.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/019/703/895/GCF_019703895.1_ASM1970389v1			na
+GCF_003062865.1	PRJNA224116	SAMN08644156	PZMT00000000.1	na	573	573	Klebsiella pneumoniae	strain=ITU3908		latest	Scaffold	Major	Full	2018/04/23	ASM306286v1	Robert Koch Institute	GCA_003062865.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/062/865/GCF_003062865.1_ASM306286v1			na
+GCF_000401975.1	PRJNA224116	SAMN02403947	ASHT00000000.1	na	1329363	630	Yersinia enterocolitica subsp. palearctica YE-P1	strain=YE-P1		latest	Contig	Major	Full	2013/05/28	YE-P1_1.0	Max von Pettenkofer-Institut	GCA_000401975.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/401/975/GCF_000401975.1_YE-P1_1.0			na
+GCF_001667425.1	PRJNA224116	SAMN04691946	LZII00000000.1	na	1834104	1834104	Mycobacterium sp. 852002-51613_SCH5001154	strain=852002-51613_SCH5001154		latest	Contig	Major	Full	2016/06/17	ASM166742v1	JCVI	GCA_001667425.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/667/425/GCF_001667425.1_ASM166742v1			na
+GCF_013369175.2	PRJNA224116	SAMN14503210	JABWOC000000000.2	na	2723303	2723303	Escherichia sp. 8.2195	strain=8.2195		latest	Contig	Major	Full	2020/08/13	ASM1336917v2	FDA	GCA_013369175.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/013/369/175/GCF_013369175.2_ASM1336917v2			na
+GCF_003252965.1	PRJNA224116	SAMN09011133	QEPV00000000.1	na	732	732	Aggregatibacter aphrophilus	strain=C2008001229		latest	Contig	Major	Full	2018/06/19	ASM325296v1	Centers for Disease Control and Prevention	GCA_003252965.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/252/965/GCF_003252965.1_ASM325296v1			na
+GCF_017960365.1	PRJNA224116	SAMN15098422	JABVYN000000000.1	na	380021	380021	Pseudomonas protegens	strain=PPRAR03		latest	Contig	Major	Full	2021/04/14	ASM1796036v1	ETH Zurich	GCA_017960365.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/017/960/365/GCF_017960365.1_ASM1796036v1			na
+GCF_900172265.1	PRJNA224116	SAMEA102345418	FWFK00000000.1	representative genome	1529041	1529041	Roseivivax jejudonensis	strain=CECT 8625		latest	Contig	Major	Full	2017/04/29	R.jejudonensis_CECT8625_Spades_Prokka	UVEG	GCA_900172265.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/172/265/GCF_900172265.1_R.jejudonensis_CECT8625_Spades_Prokka		assembly from type material	na
+GCF_000947895.1	PRJNA224116	SAMEA2794682	CDHH00000000.1	na	1765	1773	Mycobacterium tuberculosis variant bovis	strain=MB3		latest	Scaffold	Major	Full	2015/03/03	Assembly of the genome MB3	ERA7	GCA_000947895.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3			na
+GCF_021369635.1	PRJNA224116	SAMN23428406	JAJNBZ000000000.1	representative genome	1173085	1173085	Paenibacillus profundus	strain=YoMME		latest	Scaffold	Major	Full	2022/01/05	ASM2136963v1	Faculty of Biology at Sofia University "St. Kliment Ohridski"	GCA_021369635.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1			na
diff --git a/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt b/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt
new file mode 100644
index 0000000..e49e2a2
--- /dev/null
+++ b/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt
@@ -0,0 +1,22 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCF_000171015.1	PRJNA264112	SAMN02744066	ABDG00000000.2	representative genome	452589	63577	Trichoderma atroviride IMI 206040	strain=IMI 206040		latest	Contig	Major	Full	2011/11/29	TRIAT v2.0	DOE Joint Genome Institute	GCA_000171015.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/171/015/GCF_000171015.1_TRIAT_v2.0			na
+GCF_003184765.1	PRJNA479915	SAMN05660730	PSTE00000000.1	representative genome	1448322	487661	Aspergillus aculeatinus CBS 121060	strain=CBS 121060		latest	Scaffold	Major	Full	2018/06/04	Aspacu1	DOE Joint Genome Institute	GCA_003184765.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/184/765/GCF_003184765.1_Aspacu1		assembly from type material	na
+GCF_000182805.2	PRJNA51569	SAMEA3138314	CABT00000000.2	representative genome	771870	5147	Sordaria macrospora k-hell	strain=k-hell		latest	Scaffold	Major	Full	2012/03/13	ASM18280v2	Ruhr University Bochum, Department of General and Molecular Botany	GCA_000182805.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/182/805/GCF_000182805.2_ASM18280v2			na
+GCF_001792695.1	PRJNA395481	SAMN04942831	LYCR00000000.1	representative genome	109264	109264	Aspergillus bombycis	strain=NRRL 26010		latest	Contig	Major	Full	2016/10/19	ASM179269v1	USDA-ARS-SRRC	GCA_001792695.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/792/695/GCF_001792695.1_ASM179269v1		assembly from type material	na
+GCF_000223465.1	PRJNA225504	SAMN00715317	AEIM00000000.1	representative genome	590646	2315449	Yamadazyma tenuis ATCC 10573	strain=ATCC 10573		latest	Scaffold	Major	Full	2011/08/25	Candida tenuis v1.0	DOE Joint Genome Institute	GCA_000223465.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/223/465/GCF_000223465.1_Candida_tenuis_v1.0		assembly from type material	na
+GCF_011947395.1	PRJNA691333	SAMN14421089	JAATWM000000000.2	representative genome	1095194	1095194	Colletotrichum karsti	strain=CkLH20		latest	Scaffold	Major	Full	2020/12/08	ASM1194739v2	Central South University of Forestry and Technology	GCA_011947395.2	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/947/395/GCF_011947395.1_ASM1194739v2			na
+GCF_000585515.1	PRJNA245128	SAMN00974102	AMGW00000000.1	representative genome	1182544	470704	Cladophialophora yegresii CBS 114405	strain=CBS 114405		latest	Scaffold	Major	Full	2014/03/05	Clad_yegr_CBS_114405_V1	Broad Institute	GCA_000585515.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/585/515/GCF_000585515.1_Clad_yegr_CBS_114405_V1		assembly from type material	na
+GCF_001500285.1	PRJNA342682	SAMN04009710	LKNI00000000.1	representative genome	149040	149040	Mollisia scopiformis	strain=CBS 120377		latest	Scaffold	Major	Full	2016/01/07	Phisc1	DOE Joint Genome Institute	GCA_001500285.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/500/285/GCF_001500285.1_Phisc1			na
+GCF_000988165.1	PRJNA445857	SAMN02213592	JPQZ00000000.1	representative genome	40302	40302	Nosema ceranae	strain=PA08 1199		latest	Contig	Major	Full	2015/05/05	ASM98816v1	University of Ottawa	GCA_000988165.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/988/165/GCF_000988165.1_ASM98816v1			na
+GCF_012971845.1	PRJNA645153	SAMN07172427	QCYV00000000.1	representative genome	45133	45133	Lasiodiplodia theobromae	strain=AM2As		latest	Contig	Major	Full	2020/05/04	ASM1297184v1	Beltsville Agricultural Research Center	GCA_012971845.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/012/971/845/GCF_012971845.1_ASM1297184v1			na
+GCF_000403515.1	PRJNA264001	SAMD00002584	BAOW00000000.1	representative genome	1305764	327079	Pseudozyma hubeiensis SY62	strain=SY62		latest	Scaffold	Major	Full	2013/05/16	ASM40351v1	Kitami Institute of Technology	GCA_000403515.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/403/515/GCF_000403515.1_ASM40351v1			na
+GCF_000002655.1	PRJNA14003	SAMN00115746	AAHF00000000.1	representative genome	330879	746128	Aspergillus fumigatus Af293	strain=Af293		latest	Chromosome	Major	Full	2005/06/10	ASM265v1	J. Craig Venter Institute	GCA_000002655.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/655/GCF_000002655.1_ASM265v1			na
+GCF_000149245.1	PRJNA177334	SAMN03081441		na	235443	5207	Cryptococcus neoformans var. grubii H99	strain=H99		latest	Chromosome	Major	Full	2014/02/07	CNA3	Broad Institute	GCA_000149245.3	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/149/245/GCF_000149245.1_CNA3			na
+GCF_000300595.1	PRJNA242544	SAMN02981278	AEHB00000000.1	representative genome	650164	231932	Phanerochaete carnosa HHB-10118-sp	strain=HHB-10118-sp		latest	Scaffold	Major	Full	2012/10/16	Phanerochaete carnosa HHB-10118-Sp v1.0	DOE Joint Genome Institute	GCA_000300595.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/300/595/GCF_000300595.1_Phanerochaete_carnosa_HHB-10118-Sp_v1.0			na
+GCF_000226545.1	PRJNA29799			representative genome	515849	2587412	Podospora anserina S mat+			latest	Contig	Major	Full	2008/05/14	ASM22654v1	Genoscope - Centre National de Séquençage	GCA_000226545.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/226/545/GCF_000226545.1_ASM22654v1			na
+GCF_000961545.1	PRJNA319337	SAMN03199974	AXCR00000000.1	representative genome	1397361	29908	Sporothrix schenckii 1099-18	strain=1099-18		latest	Contig	Major	Full	2015/03/24	S_schenckii_v1	LNCC	GCA_000961545.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/961/545/GCF_000961545.1_S_schenckii_v1			na
+GCF_010093535.1	PRJNA625772	SAMN05446602	JAAEJD000000000.1	representative genome	673940	673940	Lindgomyces ingoldianus	strain=ATCC 200398		latest	Scaffold	Major	Full	2020/01/31	Linin1	DOE Joint Genome Institute	GCA_010093535.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/093/535/GCF_010093535.1_Linin1		assembly from type material	na
+GCF_001890905.1	PRJNA374040	SAMN00788628	MRCK00000000.1	representative genome	690307	5053	Aspergillus aculeatus ATCC 16872	strain=ATCC 16872		latest	Scaffold	Major	Full	2016/12/08	Aspac1	JGI	GCA_001890905.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/890/905/GCF_001890905.1_Aspac1		assembly from type material	na
+GCF_900519145.1	PRJNA727466	SAMEA4827382	ULHA00000000.1	representative genome	120017	120017	Ustilago hordei	strain=Uho2		latest	Contig	Major	Full	2021/02/23	Uho2_v1	Technische Universitat Munchen - WZW	GCA_900519145.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/519/145/GCF_900519145.1_Uho2_v1			na
+GCF_008704595.1	PRJNA629604	SAMN11490865	SWFT00000000.1	representative genome	5481	5481	Diutina rugosa	strain=CBS 613		latest	Scaffold	Major	Full	2019/09/26	ASM870459v1	Centre for Genomic Regulation	GCA_008704595.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/008/704/595/GCF_008704595.1_ASM870459v1		assembly from type material	na
diff --git a/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz b/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz
new file mode 100644
index 0000000..3f71f0b
Binary files /dev/null and b/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz differ
diff --git a/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz.md5 b/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz.md5
new file mode 100644
index 0000000..4fc09cb
--- /dev/null
+++ b/tests/files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz.md5
@@ -0,0 +1 @@
+a9b0b848349863ab9413d44400f99336  files/pub/taxonomy/new_taxdump/new_taxdump.tar.gz
diff --git a/tests/files/releases/release207/207.0/MD5SUM b/tests/files/releases/release207/207.0/MD5SUM
new file mode 100644
index 0000000..30fb37f
--- /dev/null
+++ b/tests/files/releases/release207/207.0/MD5SUM
@@ -0,0 +1,2 @@
+48afb9c5ecb4ee5ed7d4d3a275fe5157  files/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz
+10077bd8881757161a5a8a3454a1f75a  files/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz
diff --git a/tests/files/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz b/tests/files/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz
new file mode 100644
index 0000000..9c2cf4a
Binary files /dev/null and b/tests/files/releases/release207/207.0/ar53_taxonomy_r207.tsv.gz differ
diff --git a/tests/files/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz b/tests/files/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz
new file mode 100644
index 0000000..3e95d29
Binary files /dev/null and b/tests/files/releases/release207/207.0/bac120_taxonomy_r207.tsv.gz differ
diff --git a/tests/files/simulated/assembly_summary_gtdb.txt b/tests/files/simulated/assembly_summary_gtdb.txt
index 4601cc3..14a71a8 100644
--- a/tests/files/simulated/assembly_summary_gtdb.txt
+++ b/tests/files/simulated/assembly_summary_gtdb.txt
@@ -1,2 +1,2 @@
-GCA_000145985.1	PRJNA33361	SAMN00016987		na	583356	334771	Ignisphaera aggregans DSM 17230	strain=DSM 17230		latest	Complete Genome	Major	Full	2010/08/24	ASM14598v1	US DOE Joint Genome Institute (JGI-PGF)	GCF_000145985.1	identical	ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/145/985/GCA_000145985.1_ASM14598v1	missing rRNA genes	assembly from type material
-GCA_XXXXXXXXX.X	PRJNA202	SAMN02744041		na	414004	46770	Cenarchaeum symbiosum A			latest	Chromosome	Major	Full	2006/11/20	ASM20071v1	DOE Joint Genome Institute	GCF_000200715.1	identical	ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/200/715/GCA_000200715.1_ASM20071v1	derived from environmental source	
+GCA_000145985.1	PRJNA33361	SAMN00016987		na	583356	334771	Ignisphaera aggregans DSM 17230	strain=DSM 17230		latest	Complete Genome	Major	Full	2010/08/24	ASM14598v1	US DOE Joint Genome Institute (JGI-PGF)	GCF_000145985.1	identical	ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/145/985/GCA_000145985.1_ASM14598v1	missing rRNA genes	assembly from type material	
+GCA_XXXXXXXXX.X	PRJNA202	SAMN02744041		na	414004	46770	Cenarchaeum symbiosum A			latest	Chromosome	Major	Full	2006/11/20	ASM20071v1	DOE Joint Genome Institute	GCF_000200715.1	identical	ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/200/715/GCA_000200715.1_ASM20071v1		derived from environmental source	
diff --git a/tests/files/simulated/assembly_summary_invalid_cols.txt b/tests/files/simulated/assembly_summary_invalid_cols.txt
new file mode 100644
index 0000000..e757f78
--- /dev/null
+++ b/tests/files/simulated/assembly_summary_invalid_cols.txt
@@ -0,0 +1,4 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+GCF_000597905.1	PRJNA224116	na	1420013	573	Klebsiella pneumoniae 30684/NJST258_2	strain=30684/NJST258_2		latest	Complete Genome	Major	Full	2014/03/19	ASM59790v1	Igenbio Inc	GCA_000597905.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/597/905/GCF_000597905.1_ASM59790v1			na
+GCF_000597925.1	PRJNA224116	SAMN02951886	ATBG00000000.1	na	1343078	587753	Pseudomonas chlororaphis HT66	strain=HT66		latest	Contig	Major	Full	2014/03/19	ASM59792v1	Shanghai JiaoTong University	GCA_000597925.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/597/925/GCF_000597925.1_ASM59792v1			na
diff --git a/tests/files/simulated/assembly_summary_invalid_headermiddle.txt b/tests/files/simulated/assembly_summary_invalid_headermiddle.txt
new file mode 100644
index 0000000..75572bd
--- /dev/null
+++ b/tests/files/simulated/assembly_summary_invalid_headermiddle.txt
@@ -0,0 +1,4 @@
+GCF_003722155.1	PRJNA224116	SAMN10345419	RJJF00000000.1	na	51203	51203	Methanohalophilus euhalobius	strain=DSM 10369		latest	Contig	Major	Full	2018/11/12	ASM372215v1	King Abdullah University of Science and Technology (KAUST)	GCA_003722155.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/722/155/GCF_003722155.1_ASM372215v1			na
+GCF_004340645.1	PRJNA224116	SAMN08777283	SMMS00000000.1	representative genome	51203	51203	Methanohalophilus euhalobius	strain=WG1_MB		latest	Contig	Major	Full	2019/03/11	ASM434064v1	DOE Joint Genome Institute	GCA_004340645.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/340/645/GCF_0#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+GCF_002287175.1	PRJNA224116	SAMN04229035	LMVM00000000.1	representative genome	2161	2161	Methanobacterium bryantii	strain=M.o.H.		latest	Contig	Major	Full	2017/09/06	ASM228717v1	University of California Santa Barbara	GCA_002287175.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/287/175/GCF_002287175.1_ASM228717v1		assembly from type material	na
+GCF_000762265.1	PRJNA224116	SAMN03085433		na	2162	2162	Methanobacterium formicicum	strain=BRM9		latest	Complete Genome	Major	Full	2014/10/02	ASM76226v1	PGgRc	GCA_000762265.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/762/265/GCF_000762265.1_ASM76226v1			na
diff --git a/tests/files/simulated/assembly_summary_invalid_justheader.txt b/tests/files/simulated/assembly_summary_invalid_justheader.txt
new file mode 100644
index 0000000..b51ef41
--- /dev/null
+++ b/tests/files/simulated/assembly_summary_invalid_justheader.txt
@@ -0,0 +1,2 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
diff --git a/tests/files/simulated/assembly_summary_invalid_xCF.txt b/tests/files/simulated/assembly_summary_invalid_xCF.txt
new file mode 100644
index 0000000..2b38b26
--- /dev/null
+++ b/tests/files/simulated/assembly_summary_invalid_xCF.txt
@@ -0,0 +1,4 @@
+#   See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file.
+# assembly_accession	bioproject	biosample	wgs_master	refseq_category	taxid	species_taxid	organism_name	infraspecific_name	isolate	version_status	assembly_level	release_type	genome_rep	seq_rel_date	asm_name	submitter	gbrs_paired_asm	paired_asm_comp	ftp_path	excluded_from_refseq	relation_to_type_material	asm_not_live_date
+xCF_000597905.1	PRJNA224116	SAMN03081501		na	1420013	573	Klebsiella pneumoniae 30684/NJST258_2	strain=30684/NJST258_2		latest	Complete Genome	Major	Full	2014/03/19	ASM59790v1	Igenbio Inc	GCA_000597905.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/597/905/GCF_000597905.1_ASM59790v1			na
+GCF_000597925.1	PRJNA224116	SAMN02951886	ATBG00000000.1	na	1343078	587753	Pseudomonas chlororaphis HT66	strain=HT66		latest	Contig	Major	Full	2014/03/19	ASM59792v1	Shanghai JiaoTong University	GCA_000597925.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/597/925/GCF_000597925.1_ASM59792v1			na
diff --git a/tests/files/simulated/assembly_summary_na_url.txt b/tests/files/simulated/assembly_summary_na_url.txt
index 9acd09d..c587dfc 100755
--- a/tests/files/simulated/assembly_summary_na_url.txt
+++ b/tests/files/simulated/assembly_summary_na_url.txt
@@ -1,3 +1,3 @@
-GCF_000226095.1	PRJNA79339	SAMN00739435		representative genome	573729	78579	Thermothelomyces thermophilus ATCC 42464	strain=ATCC 42464		latest	Complete Genome	Major	Full	2011/09/16	ASM22609v1	DOE Joint Genome Institute	GCA_000226095.1	identical	na		
+GCF_000226095.1	PRJNA79339	SAMN00739435		representative genome	573729	78579	Thermothelomyces thermophilus ATCC 42464	strain=ATCC 42464		latest	Complete Genome	Major	Full	2011/09/16	ASM22609v1	DOE Joint Genome Institute	GCA_000226095.1	identical	na			na
 GCF_000947895.1	PRJNA224116	SAMEA2794682	CDHH00000000.1	na	1765	1773	Mycobacterium tuberculosis variant bovis	strain=MB3		latest	Scaffold	Major	Full	2015/03/03	Assembly of the genome MB3	ERA7	GCA_000947895.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3			na
-GCF_021369635.1	PRJNA224116	SAMN23428406	JAJNBZ000000000.1	representative genome	1173085	1173085	Paenibacillus profundus	strain=YoMME		latest	Scaffold	Major	Full	2022/01/05	ASM2136963v1	Faculty of Biology at Sofia University "St. Kliment Ohridski"	GCA_021369635.1	identical	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1			na
+GCF_021369635.1	PRJNA224116	SAMN23428406	JAJNBZ000000000.1	representative genome	1173085	1173085	Paenibacillus profundus	strain=YoMME		latest	Scaffold	Major	Full	2022/01/05	ASM2136963v1	Faculty of Biology at Sofia University "St. Kliment Ohridski"	GCA_021369635.1	identicl	https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1			na
diff --git a/tests/integration_offline.bats b/tests/integration_offline.bats
index e7b9e7a..4de2a28 100644
--- a/tests/integration_offline.bats
+++ b/tests/integration_offline.bats
@@ -9,6 +9,10 @@ setup_file() {
     # Get tests dir
     DIR="$( cd "$( dirname "$BATS_TEST_FILENAME" )" >/dev/null 2>&1 && pwd )"
    
+
+    files_dir="$DIR/files/"
+    export files_dir
+
     # Export local_dir to use local files offline instead of ncbi ftp online when testing
     local_dir="$DIR/files/"
     export local_dir
@@ -20,6 +24,11 @@ setup_file() {
     export outprefix
 }
 
+@test "Run genome_updater.sh without args" {
+    run ./genome_updater.sh
+    assert_failure
+}
+
 @test "Run genome_updater.sh and show help" {
     run ./genome_updater.sh -h
     assert_success
@@ -28,62 +37,106 @@ setup_file() {
 @test "Run genome_updater.sh and show debug info" {
     run ./genome_updater.sh -Z
     assert_success
+    assert_output --partial "GNU bash" # Loop for GNU --version info
 }
 
-@test "DB refseq" {
-    outdir=${outprefix}db-refseq/
-    label="test"
+@test "Database -d refseq" {
+    outdir=${outprefix}d-refseq/
+    label="refseq"
     run ./genome_updater.sh -d refseq -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
-
-    # Check filenames
+    assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files
     for file in $(ls_files ${outdir} ${label}); do
         [[ "$(basename $file)" = GCF* ]] # filename starts with GCF_
     done
 }
 
-@test "DB genbank" {
-    outdir=${outprefix}db-genbank/
-    label="test"
+@test "Database -d genbank" {
+    outdir=${outprefix}d-genbank/
+    label="genbank"
     run ./genome_updater.sh -d genbank -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
-    
-    # Check filenames
+    assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files
     for file in $(ls_files ${outdir} ${label}); do
         [[ "$(basename $file)" = GCA* ]] # filename starts with GCA_
     done
 }
 
-@test "DB refseq and genbank" {
-    outdir=${outprefix}db-refseq-genbank/
-    label="test"
+@test "Database -d refseq,genbank" {
+    outdir=${outprefix}d-refseq-genbank/
+    
+    label="refseq"
+    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+    files_refseq=$(count_files ${outdir} ${label})
+    assert [ ${files_refseq} -gt 0 ] # contains files
+    for file in $(ls_files ${outdir} ${label}); do
+        [[ "$(basename $file)" = GCF* ]] # filename starts with GCF_
+    done
+
+    label="genbank"
+    run ./genome_updater.sh -d genbank -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+    files_genbank=$(count_files ${outdir} ${label})
+    assert [ ${files_genbank} -gt 0 ] # contains files
+    for file in $(ls_files ${outdir} ${label}); do
+        [[ "$(basename $file)" = GCA* ]] # filename starts with GCA_
+    done
+
+    label="refseq-genbank"
     run ./genome_updater.sh -d refseq,genbank -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -eq $((files_refseq+files_genbank)) ]
 }
 
-@test "Organism group archaea" {
-    outdir=${outprefix}og-archaea/
+@test "Organism group -g archaea" {
+    outdir=${outprefix}g-archaea/
     label="test"
-    run ./genome_updater.sh -d refseq -o archaea -b ${label} -o ${outdir}
+    run ./genome_updater.sh -d refseq -g archaea -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files
 }
 
-@test "Organism group archaea and fungi" {
-    outdir=${outprefix}og-archaea-fungi/
+@test "Organism group -g fungi" {
+    outdir=${outprefix}g-fungi/
     label="test"
-    run ./genome_updater.sh -d refseq -o archaea,fungi -b ${label} -o ${outdir}
+    run ./genome_updater.sh -d refseq -g fungi -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files
 }
 
-@test "Species taxids" {
-    outdir=${outprefix}species-taxids/
+@test "Organism group -g archaea,fungi" {
+    outdir=${outprefix}g-archaea-fungi/
+
+    label="archaea"
+    run ./genome_updater.sh -d refseq -g archaea -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+    files_arc=$(count_files ${outdir} ${label})
+    assert [ ${files_arc} -gt 0 ] # contains files
+
+    label="fungi"
+    run ./genome_updater.sh -d refseq -g fungi -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+    files_fun=$(count_files ${outdir} ${label})
+    assert [ ${files_fun} -gt 0 ] # contains files
+
+    label="archaea-fungi"
+    run ./genome_updater.sh -d refseq -g archaea,fungi -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -eq $((files_arc+files_fun)) ]
+}
+
+@test "Taxids leaves ncbi" {
+    # taxids on lower levels need the complete taxonomy to work properly (tested online)
+
+    outdir=${outprefix}taxids-leaves-ncbi/
     label="test"
     # Get all possible taxids from base assembly_summary
     txids=( $(get_values_as ${local_dir}genomes/refseq/assembly_summary_refseq.txt 7 ) )
     #echo ${txids[@]} >&3
 
     # Use third
-    run ./genome_updater.sh -d refseq -S "${txids[2]}" -b ${label} -o ${outdir}
+    run ./genome_updater.sh -d refseq -T "${txids[2]}" -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
 
     # Check if output contains only used taxids
@@ -95,6 +148,17 @@ setup_file() {
     assert_equal ${txids[2]} ${txids_ret[0]} #same taxid 
 }
 
+@test "Taxids leaves gtdb" {
+    # taxids on lower levels need the complete taxonomy to work properly (tested online)
+
+    outdir=${outprefix}taxids-leaves-gtdb/
+    label="test"
+    # Use fixed one
+    run ./genome_updater.sh -d refseq,genbank -T 's__MWBV01 sp002069705' -b ${label} -o ${outdir} -g archaea -M gtdb
+    sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -eq 1 ]
+}
+
 @test "Refseq category" {
     outdir=${outprefix}refseq-category/
     label="test"
@@ -164,15 +228,15 @@ setup_file() {
     done
 }
 
-@test "Top species" {
-    outdir=${outprefix}top-species/
+@test "Top 1 leaves ncbi" {
+    outdir=${outprefix}top-leaves-ncbi/
     label="test"
     # Keep only top 1 for selected species
-    run ./genome_updater.sh -d refseq,genbank -P 1 -b ${label} -o ${outdir}
+    run ./genome_updater.sh -d refseq,genbank -A 1 -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
 
     # Get counts of species taxids on output
-    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 7 )
+    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 6 )
     ret_occ=( $( echo ${txids_ret}  | tr ' ' '\n' | sort | uniq -c | awk '{print $1}' ) )
    
     # Should have one assembly for each species taxid
@@ -181,23 +245,65 @@ setup_file() {
     done
 }
 
-@test "Top taxids" {
-    outdir=${outprefix}top-taxids/
+@test "Top 1 species ncbi" {
+    outdir=${outprefix}top-species-ncbi/
     label="test"
-    # Keep only top 1 for selected leaf
-    run ./genome_updater.sh -d refseq,genbank -A 1 -b ${label} -o ${outdir}
+    # Keep only top 1 for selected species
+    run ./genome_updater.sh -d refseq,genbank -A species:1 -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
 
-    # Get counts of leaf taxids on output
-    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 6 )
+    # Get counts of species taxids on output
+    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 7 )
     ret_occ=( $( echo ${txids_ret}  | tr ' ' '\n' | sort | uniq -c | awk '{print $1}' ) )
    
-    # Should have one assembly for each leaf taxid
+    # Should have one assembly for each species taxid
     for occ in ${ret_occ[@]}; do
         assert_equal ${occ} 1
     done
 }
 
+@test "Top 1 superkingdom ncbi" {
+    outdir=${outprefix}top-superkingdom-ncbi/
+    label="test"
+    # Keep only top 1 for selected species
+    run ./genome_updater.sh -d refseq -g archaea,fungi -A superkingdom:1 -b ${label} -o ${outdir}
+    sanity_check ${outdir} ${label}
+
+    # Check if output contains one file for archaea and one for fungi
+    assert [ $(count_files ${outdir} ${label}) -eq 2 ]
+}
+
+@test "Top gtdb" {
+    outdir=${outprefix}top-gtdb/
+    label_none="none"
+    # no top
+    run ./genome_updater.sh -M gtdb -d refseq,genbank -g archaea -b ${label_none} -o ${outdir}
+    sanity_check ${outdir} ${label_none}
+
+    # Keep only top 1 for species
+    label_species="top-species"
+    run ./genome_updater.sh -M gtdb -d refseq,genbank -g archaea -A species:1 -b ${label_species} -o ${outdir}
+    sanity_check ${outdir} ${label_species}
+    # Check if reduce number of files with filter
+    assert [ $(count_files ${outdir} ${label_none}) -gt $(count_files ${outdir} ${label_species}) ]
+
+    # Keep only top 1 for species
+    label_genus="top-genus"
+    run ./genome_updater.sh -M gtdb -d refseq,genbank -g archaea -A genus:1 -b ${label_genus} -o ${outdir}
+    sanity_check ${outdir} ${label_genus}
+    assert [ $(count_files ${outdir} ${label_species}) -gt $(count_files ${outdir} ${label_genus}) ]
+
+    # Keep only top 1 for species
+    label_phylum="top-phylum"
+    run ./genome_updater.sh -M gtdb -d refseq,genbank -g archaea -A phylum:1 -b ${label_phylum} -o ${outdir}
+    sanity_check ${outdir} ${label_phylum}
+    assert [ $(count_files ${outdir} ${label_genus}) -gt $(count_files ${outdir} ${label_phylum}) ]
+
+    # Check if not 0
+    assert [ $(count_files ${outdir} ${label_phylum}) -gt 0 ]
+}
+
+
 @test "Date start filter" {
     outdir=${outprefix}date-start-filter/
     
@@ -262,9 +368,8 @@ setup_file() {
     sanity_check ${outdir} ${label}
 
     # Check if report was printed and has all lines reported
-    report_file="${outdir}${label}/updated_assembly_accession.txt"
-    assert_file_exist "${report_file}"
-    assert_equal $(count_lines_file "${report_file}") $(count_lines_file ${outdir}assembly_summary.txt)
+    assert_file_exist ${outdir}${label}/*_assembly_accession.txt
+    assert_equal $(count_lines_file ${outdir}${label}/*_assembly_accession.txt) $(count_lines_file ${outdir}assembly_summary.txt)
 }
 
 @test "Report sequence accession" {
@@ -274,8 +379,7 @@ setup_file() {
     sanity_check ${outdir} ${label}
 
     # Check if report was printed
-    report_file="${outdir}${label}/updated_sequence_accession.txt"
-    assert_file_exist "${report_file}"
+    assert_file_exist ${outdir}${label}/*_sequence_accession.txt
 }
 
 @test "Report urls" {
@@ -297,7 +401,7 @@ setup_file() {
     outdir=${outprefix}external-assembly-summary/
     label="test"
     # Get assembly_summary from -e (not directly from url)
-    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -e ${local_dir}genomes/refseq/assembly_summary_refseq.txt
+    run ./genome_updater.sh -b ${label} -o ${outdir} -e ${local_dir}genomes/refseq/assembly_summary_refseq.txt
     sanity_check ${outdir} ${label}
 }
 
@@ -312,25 +416,66 @@ setup_file() {
 
     # Second version with more entries (refseq,genbank)
     label2="v2"
-    run ./genome_updater.sh -d refseq -b ${label2} -o ${outdir} -d refseq,genbank
+    run ./genome_updater.sh -b ${label2} -o ${outdir} -d refseq,genbank
     sanity_check ${outdir} ${label2}
 
     # Third version with same entries (nothing to download)
     label3="v3"
-    run ./genome_updater.sh -d refseq -b ${label3} -o ${outdir} -d refseq,genbank
+    run ./genome_updater.sh -b ${label3} -o ${outdir} -d refseq,genbank
     sanity_check ${outdir} ${label3}
 
     # Check log for no updates
-    grep "0 updated, 0 deleted, 0 new entries" ${outdir}${label3}/*.log # >&3
+    grep "0 updated, 0 removed, 0 new entries" ${outdir}${label3}/*.log # >&3
     assert_success
 
     # Fourth version with the same as second but rolling back from first, re-download files
     label4="v4"
-    run ./genome_updater.sh -d refseq -b ${label4} -o ${outdir} -d refseq,genbank -B v1
+    run ./genome_updater.sh -b ${label4} -o ${outdir} -d refseq,genbank -B v1
+    sanity_check ${outdir} ${label4}
+
+    # Check log for updates
+    grep "0 updated, 0 removed, [1-9][0-9]* new entries" ${outdir}${label4}/*.log # >&3
+    assert_success
+}
+
+@test "Rollback label auto update" {
+    outdir=${outprefix}rollback-label-auto-update/
+    
+    # Base version with only refseq
+    label1="v1"
+    run ./genome_updater.sh -d refseq -b ${label1} -o ${outdir}
+    sanity_check ${outdir} ${label1}
+
+    # Second version with more entries (refseq,genbank)
+    label2="v2"
+    run ./genome_updater.sh -b ${label2} -o ${outdir} -d refseq,genbank
+    sanity_check ${outdir} ${label2}
+
+    # Third version with same entries (nothing to download)
+    label3="v3"
+    run ./genome_updater.sh -b ${label3} -o ${outdir}
+    sanity_check ${outdir} ${label3}
+
+    # Check log for no updates
+    grep "0 updated, 0 removed, 0 new entries" ${outdir}${label3}/*.log # >&3
+    assert_success
+
+    # Fourth version with the same as second but rolling back from first
+    label4="v4"
+    run ./genome_updater.sh -b ${label4} -o ${outdir} -B v1 -d refseq,genbank
     sanity_check ${outdir} ${label4}
 
     # Check log for updates
-    grep "0 updated, 0 deleted, [0-9]* new entries" ${outdir}${label4}/*.log # >&3
+    grep "0 updated, 0 removed, [1-9][0-9]* new entries" ${outdir}${label4}/*.log # >&3
+    assert_success
+
+    # Continue the update from v4 (without rolling back to v1) 
+    label5="v5"
+    run ./genome_updater.sh -b ${label5} -o ${outdir} -B ""
+    sanity_check ${outdir} ${label5}
+
+    # Check log for updates
+    grep "0 updated, 0 removed, 0 new entries" ${outdir}${label5}/*.log # >&3
     assert_success
 }
 
@@ -379,15 +524,6 @@ setup_file() {
     assert_output ""
 }
 
-@test "Using curl" {
-    outdir=${outprefix}using-curl/
-    label="test"
-    use_curl=1
-    export use_curl
-    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir}
-    sanity_check ${outdir} ${label}
-}
-
 @test "Mode FIX" {
     outdir=${outprefix}mode-fix/
     label="test"
@@ -436,3 +572,68 @@ setup_file() {
     run ./genome_updater.sh -d refseq -g archaea,fungi -b ${label} -o ${outdir}
     sanity_check ${outdir} ${label}
 }
+
+@test "Mode auto UPDATE" {
+    outdir=${outprefix}mode-auto-update/
+    label="test"
+
+    # Dry-run NEW
+    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -g archaea -k
+    assert_success
+    assert_dir_not_exist ${outdir}
+
+    # Real run NEW
+    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -g archaea
+    sanity_check ${outdir} ${label}
+
+    # Dry-run UPDATE (use same parameters)
+    label="update"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -k
+    assert_success
+
+    # Real run (nothin to update, but carry parameters)
+    run ./genome_updater.sh -o ${outdir} -b ${label}
+    sanity_check ${outdir} ${label}
+
+    # Dry-run UPDATE
+    label="update2"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -g "" -d refseq,genbank -u -k
+    assert_success
+
+    # Real run FIX, remove org (get all), add database, add bool report
+    run ./genome_updater.sh -o ${outdir} -b ${label} -g "" -d refseq,genbank -u
+    sanity_check ${outdir} ${label}
+
+    assert_file_exist ${outdir}${label}/*_assembly_accession.txt
+
+    # Check log for updates
+    grep "0 updated, [1-9][0-9]* removed, [1-9][0-9]* new entries" ${outdir}${label}/*.log # >&3
+    assert_success
+}
+
+@test "Tax. Mode GTDB" {
+    outdir=${outprefix}tax-gtdb/
+    label="test"
+    run ./genome_updater.sh -d refseq,genbank -g archaea -b ${label} -o ${outdir} -M gtdb
+    sanity_check ${outdir} ${label}
+    
+    # Check log for filer with GTDB
+    grep "[1-9][0-9]* assemblies removed not in GTDB" ${outdir}${label}/*.log # >&3
+    assert_success
+}
+
+@test "Invalid assembly_summary.txt" {
+    outdir=${outprefix}invalid-as/
+    label="cols"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -e ${files_dir}simulated/assembly_summary_invalid_cols.txt
+    assert_failure
+    label="headermiddle"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -e ${files_dir}simulated/assembly_summary_invalid_headermiddle.txt
+    assert_failure
+    label="justheader"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -e ${files_dir}simulated/assembly_summary_invalid_justheader.txt
+    assert_failure
+    label="xCF"
+    run ./genome_updater.sh -o ${outdir} -b ${label} -e ${files_dir}simulated/assembly_summary_invalid_xCF.txt
+    assert_failure
+}
diff --git a/tests/integration_online.bats b/tests/integration_online.bats
index e85df3f..bc7bce6 100644
--- a/tests/integration_online.bats
+++ b/tests/integration_online.bats
@@ -30,6 +30,7 @@ setup_file() {
     # Protozoa in refseq is the smallest available assembly_summary at the time of writing this test (01.2022)
     run ./genome_updater.sh -d refseq -g protozoa -b ${label} -t ${threads} -o ${outdir}
     sanity_check ${outdir} ${label}
+    assert [ $(count_files ${outdir} ${label}) -gt 0 ]
 
     # Check filenames
     for file in $(ls_files ${outdir} ${label}); do
@@ -37,17 +38,68 @@ setup_file() {
     done
 }
 
+@test "Taxids genus ncbi" {
+    outdir=${outprefix}taxids-genus-ncbi/
+    mkdir -p "${outdir}"
+
+    # Protozoa in refseq is the smallest available assembly_summary at the time of writing this test (01.2022)
+    # 5820 genus Plasmodium
+    label_genus="genus"
+    run ./genome_updater.sh -d refseq -g protozoa -T 5820 -b ${label_genus} -t ${threads} -o ${outdir}
+    sanity_check ${outdir} ${label_genus}
+
+    # 5794 phylum Apicomplexa
+    label_phylum="phylum"
+    run ./genome_updater.sh -d refseq -g protozoa -T 5794 -b ${label_phylum} -t ${threads} -o ${outdir}
+    sanity_check ${outdir} ${label_phylum}
+    
+    # More files filtering by phylum than genus
+    assert [ $(count_files ${outdir} ${label_phylum}) -gt $(count_files ${outdir} ${label_genus}) ]
+    assert [ $(count_files ${outdir} ${label_phylum}) -gt 0 ]
+
+}
+
+@test "Taxids genus gtdb" {
+    outdir=${outprefix}taxids-genus-gtdb/
+    # p__Undinarchaeota lineage
+    #d__Archaea; p__Undinarchaeota; c__Undinarchaeia; o__Undinarchaeales; f__Naiadarchaeaceae; g__Naiadarchaeum; s__Naiadarchaeum limnaeum 
+    #d__Archaea; p__Undinarchaeota; c__Undinarchaeia; o__Undinarchaeales; f__Undinarchaeaceae; g__Undinarchaeum; s__Undinarchaeum marinum
+    #d__Archaea; p__Undinarchaeota; c__Undinarchaeia; o__Undinarchaeales; f__UBA543; g__UBA543; s__UBA543 sp002502135 
+
+    label_genus="genus"
+    run ./genome_updater.sh -d genbank -g archaea -M gtdb -T "g__Naiadarchaeum,g__Undinarchaeum" -b ${label_genus} -t ${threads} -o ${outdir}
+    sanity_check ${outdir} ${label_genus}
+
+    label_phylum="phylum"
+    run ./genome_updater.sh -d genbank -g archaea -M gtdb -T "p__Undinarchaeota" -b ${label_phylum} -t ${threads} -o ${outdir}
+    sanity_check ${outdir} ${label_phylum}
+    
+    # More files filtering by phylum than genus
+    assert [ $(count_files ${outdir} ${label_phylum}) -gt $(count_files ${outdir} ${label_genus}) ]
+    assert [ $(count_files ${outdir} ${label_phylum}) -gt 0 ]
+
+}
+
+@test "Curl" {
+    outdir=${outprefix}curl/
+    label="test"
+
+    # Protozoa in refseq is the smallest available assembly_summary at the time of writing this test (01.2022)
+    run ./genome_updater.sh -d refseq -g protozoa -b ${label} -t ${threads} -o ${outdir} -L curl
+    sanity_check ${outdir} ${label}
+}
+
 @test "NA URL" {
     outdir=${outprefix}na-url/
     label="test"
-    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_na_url.txt
+    run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_na_url.txt
     sanity_check ${outdir} ${label}
 }
 
 @test "All invalid URLs" {
     outdir=${outprefix}all-invalid-url/
     label="test"
-    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_all_invalid_url.txt
+    run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_all_invalid_url.txt
     assert_success
     assert_equal $(count_files ${outdir} ${label}) 0
 }
@@ -55,20 +107,54 @@ setup_file() {
 @test "Some invalid URLs" {
     outdir=${outprefix}some-invalid-url/
     label="test"
-    run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    run ./genome_updater.sh -b ${label} -o ${outdir} -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
     assert_success
     assert_equal $(count_files ${outdir} ${label}) 2
 }
 
+@test "Conditional exit" {
+
+    outdir=${outprefix}conditional-exit/
+    label="n0"
+    # 2 out of 4 genomes will be downloaded
+    run ./genome_updater.sh -n 0 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_success
+
+    label="n1"
+    run ./genome_updater.sh -n 1 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_failure
+    
+    label="n2"
+    run ./genome_updater.sh -n 2 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_failure
+
+    label="n3"
+    run ./genome_updater.sh -n 3 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_success
+
+    label="n0.2"
+    run ./genome_updater.sh -n 0.2 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_failure
+
+    label="n0.5"
+    run ./genome_updater.sh -n 0.5 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_failure
+
+    label="n0.51"
+    run ./genome_updater.sh -n 0.51 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_success
+
+    label="n0.99"
+    run ./genome_updater.sh -n 0.99 -R 1 -o ${outdir}${label}/ -t ${threads} -e ${files_dir}simulated/assembly_summary_some_invalid_url.txt
+    assert_success
+
+}
 
 @test "Multiple file types" {
     outdir=${outprefix}multiple-file-types/
     label="test"
 
-    # archaea has a relative small assembly_summary
-    # taxid 2180 small archaeal genome (as of 01.2022)
-    # Get one assembly for the species (3 file types)
-    run ./genome_updater.sh -d refseq -g archaea -S 2180 -P 1 -b ${label} -t ${threads} -o ${outdir} -f "assembly_report.txt,protein.faa.gz,genomic.fna.gz"
+    run ./genome_updater.sh -d refseq -g protozoa -D 20210101 -E 20220101 -b ${label} -t ${threads} -o ${outdir} -f "assembly_report.txt,protein.faa.gz,genomic.fna.gz"
     sanity_check ${outdir} ${label} 3
 }
 
@@ -77,11 +163,11 @@ setup_file() {
     label="test"
 
     # 5690 Trypanosoma genus - around 6 genomes, get only one per species (01.2022)
-    run ./genome_updater.sh -d refseq -g protozoa -T 5690 -P 1 -b ${label} -o ${outdir} -t ${threads} 
+    run ./genome_updater.sh -d refseq -g protozoa -T 5690 -A 1 -b ${label} -o ${outdir} -t ${threads} 
     sanity_check ${outdir} ${label}
 
-    # Get counts of species taxids on output
-    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 7 )
+    # Get counts of taxids on output
+    txids_ret=$(get_values_as ${outdir}assembly_summary.txt 6 )
     ret_occ=( $( echo ${txids_ret}  | tr ' ' '\n' | sort | uniq -c | awk '{print $1}' ) )
    
     # Should have one assembly for each species taxid
@@ -94,8 +180,7 @@ setup_file() {
     outdir=${outprefix}md5-verbose-log/
     label="test"
 
-    # 5693 Trypanosoma cruzi
-    run ./genome_updater.sh -d refseq -g protozoa -S 5693 -P 1 -b ${label} -o ${outdir} -t ${threads} -m -V
+    run ./genome_updater.sh -d refseq -g protozoa -D 20210101 -E 20220101 -b ${label} -o ${outdir} -t ${threads} -m -V
     sanity_check ${outdir} ${label}
 
     # Check if MD5 is verified
@@ -108,18 +193,29 @@ setup_file() {
     label="test"
 
     # 5693 Trypanosoma cruzi
-    run ./genome_updater.sh -d refseq -e ${files_dir}simulated/assembly_summary_gtdb.txt -b ${label} -o ${outdir} -t ${threads} -z
+    run ./genome_updater.sh -e ${files_dir}simulated/assembly_summary_gtdb.txt -b ${label} -o ${outdir} -t ${threads} -M gtdb
     sanity_check ${outdir} ${label}
 
     # 1 out of 2 available on GTDB
     assert_equal $(count_files ${outdir} ${label}) 1
 }
 
-@test "Download taxdump" {
-    outdir=${outprefix}download-taxdump/
+@test "Download taxdump gtdb" {
+    outdir=${outprefix}download-taxdump-gtdb/
+    label="test"
+
+    run ./genome_updater.sh -d refseq -g archaea -D 20210101 -E 20220101 -b ${label} -o ${outdir} -t ${threads} -a -M gtdb
+    sanity_check ${outdir} ${label}
+
+    # Downloaded taxdump
+    assert_file_exist ${outdir}${label}/ar53_taxonomy_r207.tsv.gz
+}
+
+@test "Download taxdump ncbi" {
+    outdir=${outprefix}download-taxdump-ncbi/
     label="test"
 
-    run ./genome_updater.sh -d refseq -g protozoa -S 5693 -P 1 -b ${label} -o ${outdir} -t ${threads} -a
+    run ./genome_updater.sh -d refseq -g protozoa -D 20210101 -E 20220101 -b ${label} -o ${outdir} -t ${threads} -a -M ncbi
     sanity_check ${outdir} ${label}
 
     # Downloaded taxdump
diff --git a/tests/libs/bats b/tests/libs/bats
index 99d64eb..210acf3 160000
--- a/tests/libs/bats
+++ b/tests/libs/bats
@@ -1 +1 @@
-Subproject commit 99d64eb017abcd6a766dd0d354e625526da69cb3
+Subproject commit 210acf3a8ed318ddedad3137c15451739beba7d4
diff --git a/tests/libs/bats-support b/tests/libs/bats-support
index d140a65..24a72e1 160000
--- a/tests/libs/bats-support
+++ b/tests/libs/bats-support
@@ -1 +1 @@
-Subproject commit d140a65044b2d6810381935ae7f0c94c7023c8c3
+Subproject commit 24a72e14349690bcbf7c151b9d2d1cdd32d36eb1