From 5478e931af0e3028fc32b46e20be379b9443fd68 Mon Sep 17 00:00:00 2001 From: pirovc <4673375+pirovc@users.noreply.github.com> Date: Mon, 3 Jul 2023 13:50:21 +0200 Subject: [PATCH] genome_updater version 0.6.3 (#87) * assembly_summary now has 38 cols * remove md5sum check for gtdb taxonomy files, not matching latest * small fix --- genome_updater.sh | 40 ++++++++++------- .../genbank/archaea/assembly_summary.txt | 44 +++++++++---------- .../archaea/assembly_summary_historical.txt | 44 +++++++++---------- .../genbank/assembly_summary_genbank.txt | 44 +++++++++---------- .../assembly_summary_genbank_historical.txt | 44 +++++++++---------- .../genbank/fungi/assembly_summary.txt | 44 +++++++++---------- .../fungi/assembly_summary_historical.txt | 44 +++++++++---------- .../refseq/archaea/assembly_summary.txt | 44 +++++++++---------- .../archaea/assembly_summary_historical.txt | 44 +++++++++---------- .../refseq/assembly_summary_refseq.txt | 44 +++++++++---------- .../assembly_summary_refseq_historical.txt | 44 +++++++++---------- .../genomes/refseq/fungi/assembly_summary.txt | 44 +++++++++---------- .../fungi/assembly_summary_historical.txt | 44 +++++++++---------- tests/integration_offline.bats | 8 ++-- tests/integration_online.bats | 2 +- 15 files changed, 292 insertions(+), 286 deletions(-) diff --git a/genome_updater.sh b/genome_updater.sh index 686aaa3..d63ac96 100755 --- a/genome_updater.sh +++ b/genome_updater.sh @@ -25,7 +25,7 @@ IFS=$' ' # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -version="0.6.2" +version="0.6.3" # Define ncbi_base_url or use local files (for testing) local_dir=${local_dir:-} @@ -45,8 +45,8 @@ export LC_NUMERIC="en_US.UTF-8" #activate aliases in the script shopt -s expand_aliases alias sort="sort --field-separator=$'\t'" -join_as_fields1="1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22,1.23" -join_as_fields2="2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12,2.13,2.14,2.15,2.16,2.17,2.18,2.19,2.20,2.21,2.22,2.23" +join_as_fields1="1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,1.10,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.20,1.21,1.22,1.23,1.24,1.25,1.26,1.27,1.28,1.29,1.30,1.31,1.32,1.33,1.34,1.35,1.36,1.37,1.38" +join_as_fields2="1.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,2.10,2.11,2.12,2.13,2.14,2.15,2.16,2.17,2.18,2.19,2.20,2.21,2.22,2.23,2.24,2.25,2.26,2.27,2.28,2.29,2.30,2.31,2.32,2.33,2.34,2.35,2.36,2.37,2.38" download_url() # parameter: ${1} url, ${2} output file/directory (omit/empty to STDOUT) { @@ -70,22 +70,27 @@ download_url() # parameter: ${1} url, ${2} output file/directory (omit/empty to } export -f download_url #export it to be accessible to the parallel call -download_retry_md5(){ # parameter: ${1} url, ${2} output file, ${3} url MD5, ${4} re-tries +download_retry_md5(){ # parameter: ${1} url, ${2} output file, ${3} url MD5 (empty to skip), ${4} re-tries for (( att=1; att<=${4:-1}; att++ )); do if [ "${att}" -gt 1 ]; then echolog " - Failed to download ${url}. Trying again #${att}" "1" fi download_url "${1}" "${2}" - real_md5=$(download_url "${3}" | grep "${1##*/}" | cut -f1 -d' ') - if [ -z "${real_md5}" ]; then - continue; # did not find url file on md5 file (or empty), try again + # No md5 file to check + if [[ -z "${3}" ]]; then + return 0; else - file_md5=$(md5sum ${2} | cut -f1 -d' ') - if [ "${file_md5}" != "${real_md5}" ]; then - continue; # md5 didn't match, try again + real_md5=$(download_url "${3}" | grep "${1##*/}" | cut -f1 -d' ') + if [ -z "${real_md5}" ]; then + continue; # did not find url file on md5 file (or empty), try again else - return 0; # md5 matched, return success - fi + file_md5=$(md5sum ${2} | cut -f1 -d' ') + if [ "${file_md5}" != "${real_md5}" ]; then + continue; # md5 didn't match, try again + else + return 0; # md5 matched, return success + fi + fi fi done return 1; # failed to check md5 after all attempts @@ -150,7 +155,7 @@ check_assembly_summary() # parameter: ${1} assembly_summary file - return 0 true # Last char is empty (line break) if [ ! -z $(tail -c -1 "${1}") ]; then return 1; fi - # if contains header char parts of the header anywhere starting lines + # if contains header char parts of the header anywhere besides starting lines grep -m 1 "^#" "${1}" > /dev/null 2>&1 if [ $? -eq 0 ]; then return 1; fi @@ -162,9 +167,9 @@ check_assembly_summary() # parameter: ${1} assembly_summary file - return 0 true grep -m 1 " assembly_accession" "${1}" > /dev/null 2>&1 if [ $? -eq 0 ]; then return 1; fi - # if every line has 23 cols - awk 'BEGIN{FS=OFS="\t"}{print NF}' "${1}" | grep -v "23" > /dev/null 2>&1 - if [ $? -eq 0 ]; then return 1; fi + # if every line has same number of cols (besides headers) + ncols=$(grep -v "^#" "${1}" | awk 'BEGIN{FS=OFS="\t"}{print NF}' | uniq | wc -l) + if [[ ${ncols} -gt 1 ]]; then return 1; fi # if every line starts with GCF_ or GCA_ grep -v "^GC[FA]_" "${1}" > /dev/null 2>&1 @@ -258,7 +263,8 @@ filter_assembly_summary() # parameter: ${1} assembly_summary file, ${2} number o gtdb_tax=$(tmp_file "gtdb_tax.tmp") for url in "${gtdb_urls[@]}"; do tmp_tax=$(tmp_file "gtdb_tax.tmp.gz") - if ! download_retry_md5 "${url}" "${tmp_tax}" "${gtdb_base_url}MD5SUM.txt" "${retry_download_batch}"; then + #if ! download_retry_md5 "${url}" "${tmp_tax}" "${gtdb_base_url}MD5SUM.txt" "${retry_download_batch}"; then + if ! download_retry_md5 "${url}" "${tmp_tax}" "" "${retry_download_batch}"; then return 1; else # awk to remove prefix RS_ or GB_ diff --git a/tests/files/genomes/genbank/archaea/assembly_summary.txt b/tests/files/genomes/genbank/archaea/assembly_summary.txt index 58ba759..f6ff273 100644 --- a/tests/files/genomes/genbank/archaea/assembly_summary.txt +++ b/tests/files/genomes/genbank/archaea/assembly_summary.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_903930505.1 PRJEB38681 SAMEA6952057 CAIYYQ000000000.1 na 2026739 2026739 Euryarchaeota archaeon AlinenSedimentsCore2_bin-0840 latest Contig Major Full 2020/07/18 freshwater MAG --- AlinenSedimentsCore2_bin-0840 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/930/505/GCA_903930505.1_freshwater_MAG_---_AlinenSedimentsCore2_bin-0840 derived from metagenome; genus undefined na -GCA_903858355.1 PRJEB38681 SAMEA6954579 CAIOIP000000000.1 na 2220064 2220064 uncultured Candidatus Micrarchaeota archaeon AlinenSedimentsD1_bin-0133 latest Contig Major Full 2020/07/16 freshwater MAG --- AlinenSedimentsD1_bin-0133 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/858/355/GCA_903858355.1_freshwater_MAG_---_AlinenSedimentsD1_bin-0133 derived from environmental source; derived from metagenome na -GCA_016839815.1 PRJNA680430 SAMN16492231 JAEOTM000000000.1 na 2800102 2800102 Candidatus Hodarchaeota archaeon YT2_004 latest Contig Major Full 2021/02/09 ASM1683981v1 Shenzhen Univeristy na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/839/815/GCA_016839815.1_ASM1683981v1 derived from metagenome; genus undefined na -GCA_011389385.1 PRJNA480137 SAMN09639886 DTGE00000000.1 na 2026714 2026714 Candidatus Bathyarchaeota archaeon SpSt-755 latest Contig Major Full 2020/03/17 ASM1138938v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/389/385/GCA_011389385.1_ASM1138938v1 derived from metagenome; genus undefined na -GCA_017656495.1 PRJNA635695 SAMN15049706 JACDNS000000000.1 na 35749 35749 Thermococcus sp. GB_MAG1_027 latest Contig Major Full 2021/04/01 ASM1765649v1 Marine Biological Laboratory na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/656/495/GCA_017656495.1_ASM1765649v1 derived from metagenome na -GCA_018645535.1 PRJNA630981 SAMN14913871 JABGWN000000000.1 na 2026739 2026739 Euryarchaeota archaeon SI034_bin52 latest Contig Major Full 2021/06/02 ASM1864553v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/645/535/GCA_018645535.1_ASM1864553v1 derived from metagenome; genus undefined na -GCA_002499365.1 PRJNA348753 SAMN06027185 DALD00000000.1 na 1915872 1915872 Euryarchaeota archaeon UBA29 UBA29 latest Scaffold Major Full 2017/10/10 ASM249936v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/499/365/GCA_002499365.1_ASM249936v1 derived from metagenome; genus undefined na -GCA_004525575.1 PRJNA511814 SAMN11127074 SPCB00000000.1 na 2053491 2053491 Candidatus Thorarchaeota archaeon das_tool.maxbin2.13 latest Contig Major Full 2019/03/30 ASM452557v1 Radboud University Njmegen na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/525/575/GCA_004525575.1_ASM452557v1 derived from metagenome; genus undefined na -GCA_011335015.1 PRJNA480137 SAMN09639889 DTGH00000000.1 na 2250274 2250274 Candidatus Micrarchaeota archaeon SpSt-758 latest Contig Major Full 2020/03/16 ASM1133501v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/335/015/GCA_011335015.1_ASM1133501v1 derived from metagenome; genus undefined na -GCA_002069705.1 PRJNA321808 SAMN05004159 MWBV00000000.1 na 1852841 1852841 Candidatus Diapherotrites archaeon ADurb.Bin253 ADurb.Bin253 latest Contig Major Full 2017/03/22 ASM206970v1 University of Illinois na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/069/705/GCA_002069705.1_ASM206970v1 derived from metagenome; genus undefined na -GCA_900316635.1 PRJEB21624 SAMEA104666887 ONDQ00000000.1 na 253161 253161 uncultured Methanobrevibacter sp. RUG201 latest Scaffold Major Full 2018/03/21 Rumen uncultured genome RUG201 THE ROSLIN INSTITUTE na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/316/635/GCA_900316635.1_Rumen_uncultured_genome_RUG201 derived from environmental source na -GCA_011388575.1 PRJNA480137 SAMN09638894 DRUB00000000.1 na 334771 334771 Ignisphaera aggregans SpSt-1 latest Contig Major Full 2020/03/17 ASM1138857v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/388/575/GCA_011388575.1_ASM1138857v1 derived from metagenome na -GCA_018304485.1 PRJNA288027 SAMN18341270 JAGVWB000000000.1 na 2026736 2026736 Candidatus Diapherotrites archaeon RIFCSPLOWO2_01_FULL_43_13 latest Scaffold Major Full 2021/05/07 ASM1830448v1 Banfield Lab, University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/304/485/GCA_018304485.1_ASM1830448v1 derived from metagenome; genus undefined na -GCA_018676255.1 PRJNA630981 SAMN14914095 JABHFD000000000.1 na 2026739 2026739 Euryarchaeota archaeon SI037_bin172 latest Contig Major Full 2021/06/02 ASM1867625v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/676/255/GCA_018676255.1_ASM1867625v1 derived from metagenome; genus undefined na -GCA_016196285.1 PRJNA640378 SAMN15435488 JACPXY000000000.1 na 2026773 2026773 Candidatus Pacearchaeota archaeon NC_groundwater_849_Pr1_B-0.1um_42_10 latest Contig Major Full 2020/12/21 ASM1619628v1 Innovative Genomics Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/196/285/GCA_016196285.1_ASM1619628v1 derived from metagenome; genus undefined na -GCA_002497565.1 PRJNA348753 SAMN06027207 DADS00000000.1 na 1915824 1915824 Euryarchaeota archaeon UBA179 UBA179 latest Scaffold Major Full 2017/10/10 ASM249756v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/497/565/GCA_002497565.1_ASM249756v1 derived from metagenome; genus undefined na -GCA_902383905.1 PRJEB33885 SAMEA5851664 representative genome 1406512 1406512 Candidatus Methanomassiliicoccus intestinalis MGYG-HGUT-02160 latest Complete Genome Major Full 2019/08/10 UHGG_MGYG-HGUT-02160 EMG GCF_902383905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/383/905/GCA_902383905.1_UHGG_MGYG-HGUT-02160 na -GCA_018692575.1 PRJNA630981 SAMN14914238 JABHKQ000000000.1 na 2026803 2026803 Candidatus Woesearchaeota archaeon SI037S2_bin24 latest Contig Major Full 2021/06/02 ASM1869257v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/692/575/GCA_018692575.1_ASM1869257v1 derived from metagenome; genus undefined na -GCA_013390775.1 PRJNA640238 SAMN15312031 JACATB000000000.1 na 2511932 2511932 Marine Group I thaumarchaeote strain=D11 latest Scaffold Major Full 2020/07/06 ASM1339077v1 National Science Foundation of China GCF_013390775.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/390/775/GCA_013390775.1_ASM1339077v1 genus undefined na -GCA_002727275.1 PRJNA391943 SAMN07618837 PBWO00000000.1 na 2026739 2026739 Euryarchaeota archaeon RS814 latest Contig Major Full 2017/10/26 ASM272727v1 Tara Oceans Consortium na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/727/275/GCA_002727275.1_ASM272727v1 derived from metagenome; genus undefined na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_002069705.1 PRJNA321808 SAMN05004159 MWBV00000000.1 na 1852841 1852841 Candidatus Diapherotrites archaeon ADurb.Bin253 na ADurb.Bin253 latest Contig Major Full 2017/03/22 ASM206970v1 University of Illinois na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/069/705/GCA_002069705.1_ASM206970v1 derived from metagenome; genus undefined na na haploid archaea 877842 877842 29.0 0 95 95 University of Illinois Annotation submitted by University of Illinois 03/22/17 1003 972 31 na +GCA_002497565.1 PRJNA348753 SAMN06027207 DADS00000000.1 na 1915824 1915824 Euryarchaeota archaeon UBA179 na UBA179 latest Scaffold Major Full 2017/10/10 ASM249756v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/497/565/GCA_002497565.1_ASM249756v1 derived from metagenome; genus undefined na na haploid archaea 1815858 1790997 45.5 0 109 109 na na na 0 0 0 28894102 +GCA_002499365.1 PRJNA348753 SAMN06027185 DALD00000000.1 na 1915872 1915872 Euryarchaeota archaeon UBA29 na UBA29 latest Scaffold Major Full 2017/10/10 ASM249936v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/499/365/GCA_002499365.1_ASM249936v1 derived from metagenome; genus undefined na na haploid archaea 1460939 1381050 52.0 0 64 64 na na na 0 0 0 28894102 +GCA_002727275.1 PRJNA391943 SAMN07618837 PBWO00000000.1 na 2026739 2026739 Euryarchaeota archaeon na RS814 latest Contig Major Full 2017/10/26 ASM272727v1 Tara Oceans Consortium na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/727/275/GCA_002727275.1_ASM272727v1 derived from metagenome; genus undefined na na haploid archaea 1382677 1382677 50.0 0 60 60 NCBI NCBI Prokaryotic Genome Annotation Pipeline 10/05/17 1252 1198 40 29337314 +GCA_004525575.1 PRJNA511814 SAMN11127074 SPCB00000000.1 na 2053491 2053491 Candidatus Thorarchaeota archaeon na das_tool.maxbin2.13 latest Contig Major Full 2019/03/30 ASM452557v1 Radboud University Njmegen na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/525/575/GCA_004525575.1_ASM452557v1 derived from metagenome; genus undefined na na haploid archaea 3054279 3054279 42.0 0 503 503 NCBI NCBI Prokaryotic Genome Annotation Pipeline 03/25/19 3222 3154 51 na +GCA_013390775.1 PRJNA640238 SAMN15312031 JACATB000000000.1 na 2511932 2511932 Marine Group I thaumarchaeote strain=D11 na latest Scaffold Major Full 2020/07/06 ASM1339077v1 National Science Foundation of China GCF_013390775.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/390/775/GCA_013390775.1_ASM1339077v1 contaminated; genus undefined na na haploid archaea 1279113 1270694 37.0 0 215 215 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 06/30/20 1571 1521 38 30592124 +GCA_016196285.1 PRJNA640378 SAMN15435488 JACPXY000000000.1 na 2026773 2026773 Candidatus Pacearchaeota archaeon na NC_groundwater_849_Pr1_B-0.1um_42_10 latest Contig Major Full 2020/12/21 ASM1619628v1 Innovative Genomics Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/196/285/GCA_016196285.1_ASM1619628v1 derived from metagenome; genus undefined na na haploid archaea 863944 863944 42.0 0 72 72 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/21/20 1055 1015 37 na +GCA_016839815.1 PRJNA680430 SAMN16492231 JAEOTM000000000.1 na 2800102 2800102 Candidatus Hodarchaeota archaeon na YT2_004 latest Contig Major Full 2021/02/09 ASM1683981v1 Shenzhen Univeristy na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/839/815/GCA_016839815.1_ASM1683981v1 derived from metagenome; genus undefined na na haploid archaea 4244502 4244502 36.0 0 206 206 na na na 0 0 0 na +GCA_017656495.1 PRJNA635695 SAMN15049706 JACDNS000000000.1 na 35749 35749 Thermococcus sp. na GB_MAG1_027 latest Contig Major Full 2021/04/01 ASM1765649v1 Marine Biological Laboratory na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/656/495/GCA_017656495.1_ASM1765649v1 derived from metagenome na na haploid archaea 2048078 2048078 40.0 0 25 25 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/15/20 2252 2203 26 na +GCA_018304485.1 PRJNA288027 SAMN18341270 JAGVWB000000000.1 na 2026736 2026736 Candidatus Diapherotrites archaeon na RIFCSPLOWO2_01_FULL_43_13 latest Scaffold Major Full 2021/05/07 ASM1830448v1 Banfield Lab, University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/304/485/GCA_018304485.1_ASM1830448v1 derived from metagenome; genus undefined na na haploid archaea 870445 870344 42.5 0 40 40 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/30/21 1067 1023 39 na +GCA_018645535.1 PRJNA630981 SAMN14913871 JABGWN000000000.1 na 2026739 2026739 Euryarchaeota archaeon na SI034_bin52 latest Contig Major Full 2021/06/02 ASM1864553v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/645/535/GCA_018645535.1_ASM1864553v1 derived from metagenome; genus undefined na na haploid archaea 1736086 1736086 44.5 0 299 299 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 1674 1629 41 33504941 +GCA_018676255.1 PRJNA630981 SAMN14914095 JABHFD000000000.1 na 2026739 2026739 Euryarchaeota archaeon na SI037_bin172 latest Contig Major Full 2021/06/02 ASM1867625v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/676/255/GCA_018676255.1_ASM1867625v1 derived from metagenome; genus undefined na na haploid archaea 1921001 1921001 48.5 0 158 158 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 1716 1671 38 33504941 +GCA_018692575.1 PRJNA630981 SAMN14914238 JABHKQ000000000.1 na 2026803 2026803 Candidatus Woesearchaeota archaeon na SI037S2_bin24 latest Contig Major Full 2021/06/02 ASM1869257v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/692/575/GCA_018692575.1_ASM1869257v1 derived from metagenome; genus undefined na na haploid archaea 581819 581819 31.0 0 32 32 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 761 715 41 33504941 +GCA_900316635.1 PRJEB21624 SAMEA104666887 ONDQ00000000.1 na 253161 253161 uncultured Methanobrevibacter sp. na RUG201 latest Scaffold Major Full 2018/03/21 Rumen uncultured genome RUG201 THE ROSLIN INSTITUTE na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/316/635/GCA_900316635.1_Rumen_uncultured_genome_RUG201 derived from environmental source na na haploid archaea 1920279 1919943 34.0 0 97 97 na na na 0 0 0 na +GCA_902383905.1 PRJEB33885 SAMEA5851664 na representative genome 1406512 1406512 Candidatus Methanomassiliicoccus intestinalis na MGYG-HGUT-02160 latest Complete Genome Major Full 2019/08/10 UHGG_MGYG-HGUT-02160 EMG GCF_902383905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/383/905/GCA_902383905.1_UHGG_MGYG-HGUT-02160 na na na haploid archaea 1931651 1931651 41.0 1 1 1 na na na 0 0 0 na +GCA_903858355.1 PRJEB38681 SAMEA6954579 CAIOIP000000000.1 na 2220064 2220064 uncultured Candidatus Micrarchaeota archaeon na AlinenSedimentsD1_bin-0133 latest Contig Major Full 2020/07/16 freshwater MAG --- AlinenSedimentsD1_bin-0133 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/858/355/GCA_903858355.1_freshwater_MAG_---_AlinenSedimentsD1_bin-0133 derived from environmental source; derived from metagenome; fragmented assembly; genus undefined na na haploid archaea 791701 791701 57.5 0 277 277 na na na 0 0 0 na +GCA_903930505.1 PRJEB38681 SAMEA6952057 CAIYYQ000000000.1 na 2026739 2026739 Euryarchaeota archaeon na AlinenSedimentsCore2_bin-0840 latest Contig Major Full 2020/07/18 freshwater MAG --- AlinenSedimentsCore2_bin-0840 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/930/505/GCA_903930505.1_freshwater_MAG_---_AlinenSedimentsCore2_bin-0840 derived from metagenome; genus undefined na na haploid archaea 2093575 2093575 40.0 0 441 441 na na na 0 0 0 na +GCA_011335015.1 PRJNA480137 SAMN09639889 DTGH00000000.1 na 2250274 2250274 Candidatus Micrarchaeota archaeon na SpSt-758 suppressed Contig Major Full 2020/03/16 ASM1133501v1 The University of Hong Kong na na na derived from metagenome; genus undefined na 2022/03/04 haploid archaea 993631 993631 44.5 0 32 32 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/27/19 1149 1099 45 31911466 +GCA_011388575.1 PRJNA480137 SAMN09638894 DRUB00000000.1 na 334771 334771 Ignisphaera aggregans na SpSt-1 suppressed Contig Major Full 2020/03/17 ASM1138857v1 The University of Hong Kong na na na derived from metagenome na 2022/03/04 haploid archaea 2087524 2087524 33.5 0 227 227 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/26/19 2237 2086 50 31911466 +GCA_011389385.1 PRJNA480137 SAMN09639886 DTGE00000000.1 na 2026714 2026714 Candidatus Bathyarchaeota archaeon na SpSt-755 suppressed Contig Major Full 2020/03/17 ASM1138938v1 The University of Hong Kong na na na derived from metagenome; genus undefined na 2022/03/04 haploid archaea 1245025 1245025 52.0 0 166 166 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/27/19 1297 1261 21 31911466 diff --git a/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt b/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt index 58ba759..f6ff273 100644 --- a/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt +++ b/tests/files/genomes/genbank/archaea/assembly_summary_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_903930505.1 PRJEB38681 SAMEA6952057 CAIYYQ000000000.1 na 2026739 2026739 Euryarchaeota archaeon AlinenSedimentsCore2_bin-0840 latest Contig Major Full 2020/07/18 freshwater MAG --- AlinenSedimentsCore2_bin-0840 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/930/505/GCA_903930505.1_freshwater_MAG_---_AlinenSedimentsCore2_bin-0840 derived from metagenome; genus undefined na -GCA_903858355.1 PRJEB38681 SAMEA6954579 CAIOIP000000000.1 na 2220064 2220064 uncultured Candidatus Micrarchaeota archaeon AlinenSedimentsD1_bin-0133 latest Contig Major Full 2020/07/16 freshwater MAG --- AlinenSedimentsD1_bin-0133 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/858/355/GCA_903858355.1_freshwater_MAG_---_AlinenSedimentsD1_bin-0133 derived from environmental source; derived from metagenome na -GCA_016839815.1 PRJNA680430 SAMN16492231 JAEOTM000000000.1 na 2800102 2800102 Candidatus Hodarchaeota archaeon YT2_004 latest Contig Major Full 2021/02/09 ASM1683981v1 Shenzhen Univeristy na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/839/815/GCA_016839815.1_ASM1683981v1 derived from metagenome; genus undefined na -GCA_011389385.1 PRJNA480137 SAMN09639886 DTGE00000000.1 na 2026714 2026714 Candidatus Bathyarchaeota archaeon SpSt-755 latest Contig Major Full 2020/03/17 ASM1138938v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/389/385/GCA_011389385.1_ASM1138938v1 derived from metagenome; genus undefined na -GCA_017656495.1 PRJNA635695 SAMN15049706 JACDNS000000000.1 na 35749 35749 Thermococcus sp. GB_MAG1_027 latest Contig Major Full 2021/04/01 ASM1765649v1 Marine Biological Laboratory na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/656/495/GCA_017656495.1_ASM1765649v1 derived from metagenome na -GCA_018645535.1 PRJNA630981 SAMN14913871 JABGWN000000000.1 na 2026739 2026739 Euryarchaeota archaeon SI034_bin52 latest Contig Major Full 2021/06/02 ASM1864553v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/645/535/GCA_018645535.1_ASM1864553v1 derived from metagenome; genus undefined na -GCA_002499365.1 PRJNA348753 SAMN06027185 DALD00000000.1 na 1915872 1915872 Euryarchaeota archaeon UBA29 UBA29 latest Scaffold Major Full 2017/10/10 ASM249936v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/499/365/GCA_002499365.1_ASM249936v1 derived from metagenome; genus undefined na -GCA_004525575.1 PRJNA511814 SAMN11127074 SPCB00000000.1 na 2053491 2053491 Candidatus Thorarchaeota archaeon das_tool.maxbin2.13 latest Contig Major Full 2019/03/30 ASM452557v1 Radboud University Njmegen na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/525/575/GCA_004525575.1_ASM452557v1 derived from metagenome; genus undefined na -GCA_011335015.1 PRJNA480137 SAMN09639889 DTGH00000000.1 na 2250274 2250274 Candidatus Micrarchaeota archaeon SpSt-758 latest Contig Major Full 2020/03/16 ASM1133501v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/335/015/GCA_011335015.1_ASM1133501v1 derived from metagenome; genus undefined na -GCA_002069705.1 PRJNA321808 SAMN05004159 MWBV00000000.1 na 1852841 1852841 Candidatus Diapherotrites archaeon ADurb.Bin253 ADurb.Bin253 latest Contig Major Full 2017/03/22 ASM206970v1 University of Illinois na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/069/705/GCA_002069705.1_ASM206970v1 derived from metagenome; genus undefined na -GCA_900316635.1 PRJEB21624 SAMEA104666887 ONDQ00000000.1 na 253161 253161 uncultured Methanobrevibacter sp. RUG201 latest Scaffold Major Full 2018/03/21 Rumen uncultured genome RUG201 THE ROSLIN INSTITUTE na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/316/635/GCA_900316635.1_Rumen_uncultured_genome_RUG201 derived from environmental source na -GCA_011388575.1 PRJNA480137 SAMN09638894 DRUB00000000.1 na 334771 334771 Ignisphaera aggregans SpSt-1 latest Contig Major Full 2020/03/17 ASM1138857v1 The University of Hong Kong na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/388/575/GCA_011388575.1_ASM1138857v1 derived from metagenome na -GCA_018304485.1 PRJNA288027 SAMN18341270 JAGVWB000000000.1 na 2026736 2026736 Candidatus Diapherotrites archaeon RIFCSPLOWO2_01_FULL_43_13 latest Scaffold Major Full 2021/05/07 ASM1830448v1 Banfield Lab, University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/304/485/GCA_018304485.1_ASM1830448v1 derived from metagenome; genus undefined na -GCA_018676255.1 PRJNA630981 SAMN14914095 JABHFD000000000.1 na 2026739 2026739 Euryarchaeota archaeon SI037_bin172 latest Contig Major Full 2021/06/02 ASM1867625v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/676/255/GCA_018676255.1_ASM1867625v1 derived from metagenome; genus undefined na -GCA_016196285.1 PRJNA640378 SAMN15435488 JACPXY000000000.1 na 2026773 2026773 Candidatus Pacearchaeota archaeon NC_groundwater_849_Pr1_B-0.1um_42_10 latest Contig Major Full 2020/12/21 ASM1619628v1 Innovative Genomics Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/196/285/GCA_016196285.1_ASM1619628v1 derived from metagenome; genus undefined na -GCA_002497565.1 PRJNA348753 SAMN06027207 DADS00000000.1 na 1915824 1915824 Euryarchaeota archaeon UBA179 UBA179 latest Scaffold Major Full 2017/10/10 ASM249756v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/497/565/GCA_002497565.1_ASM249756v1 derived from metagenome; genus undefined na -GCA_902383905.1 PRJEB33885 SAMEA5851664 representative genome 1406512 1406512 Candidatus Methanomassiliicoccus intestinalis MGYG-HGUT-02160 latest Complete Genome Major Full 2019/08/10 UHGG_MGYG-HGUT-02160 EMG GCF_902383905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/383/905/GCA_902383905.1_UHGG_MGYG-HGUT-02160 na -GCA_018692575.1 PRJNA630981 SAMN14914238 JABHKQ000000000.1 na 2026803 2026803 Candidatus Woesearchaeota archaeon SI037S2_bin24 latest Contig Major Full 2021/06/02 ASM1869257v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/692/575/GCA_018692575.1_ASM1869257v1 derived from metagenome; genus undefined na -GCA_013390775.1 PRJNA640238 SAMN15312031 JACATB000000000.1 na 2511932 2511932 Marine Group I thaumarchaeote strain=D11 latest Scaffold Major Full 2020/07/06 ASM1339077v1 National Science Foundation of China GCF_013390775.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/390/775/GCA_013390775.1_ASM1339077v1 genus undefined na -GCA_002727275.1 PRJNA391943 SAMN07618837 PBWO00000000.1 na 2026739 2026739 Euryarchaeota archaeon RS814 latest Contig Major Full 2017/10/26 ASM272727v1 Tara Oceans Consortium na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/727/275/GCA_002727275.1_ASM272727v1 derived from metagenome; genus undefined na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_002069705.1 PRJNA321808 SAMN05004159 MWBV00000000.1 na 1852841 1852841 Candidatus Diapherotrites archaeon ADurb.Bin253 na ADurb.Bin253 latest Contig Major Full 2017/03/22 ASM206970v1 University of Illinois na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/069/705/GCA_002069705.1_ASM206970v1 derived from metagenome; genus undefined na na haploid archaea 877842 877842 29.0 0 95 95 University of Illinois Annotation submitted by University of Illinois 03/22/17 1003 972 31 na +GCA_002497565.1 PRJNA348753 SAMN06027207 DADS00000000.1 na 1915824 1915824 Euryarchaeota archaeon UBA179 na UBA179 latest Scaffold Major Full 2017/10/10 ASM249756v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/497/565/GCA_002497565.1_ASM249756v1 derived from metagenome; genus undefined na na haploid archaea 1815858 1790997 45.5 0 109 109 na na na 0 0 0 28894102 +GCA_002499365.1 PRJNA348753 SAMN06027185 DALD00000000.1 na 1915872 1915872 Euryarchaeota archaeon UBA29 na UBA29 latest Scaffold Major Full 2017/10/10 ASM249936v1 University of Queensland na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/499/365/GCA_002499365.1_ASM249936v1 derived from metagenome; genus undefined na na haploid archaea 1460939 1381050 52.0 0 64 64 na na na 0 0 0 28894102 +GCA_002727275.1 PRJNA391943 SAMN07618837 PBWO00000000.1 na 2026739 2026739 Euryarchaeota archaeon na RS814 latest Contig Major Full 2017/10/26 ASM272727v1 Tara Oceans Consortium na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/727/275/GCA_002727275.1_ASM272727v1 derived from metagenome; genus undefined na na haploid archaea 1382677 1382677 50.0 0 60 60 NCBI NCBI Prokaryotic Genome Annotation Pipeline 10/05/17 1252 1198 40 29337314 +GCA_004525575.1 PRJNA511814 SAMN11127074 SPCB00000000.1 na 2053491 2053491 Candidatus Thorarchaeota archaeon na das_tool.maxbin2.13 latest Contig Major Full 2019/03/30 ASM452557v1 Radboud University Njmegen na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/525/575/GCA_004525575.1_ASM452557v1 derived from metagenome; genus undefined na na haploid archaea 3054279 3054279 42.0 0 503 503 NCBI NCBI Prokaryotic Genome Annotation Pipeline 03/25/19 3222 3154 51 na +GCA_013390775.1 PRJNA640238 SAMN15312031 JACATB000000000.1 na 2511932 2511932 Marine Group I thaumarchaeote strain=D11 na latest Scaffold Major Full 2020/07/06 ASM1339077v1 National Science Foundation of China GCF_013390775.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/390/775/GCA_013390775.1_ASM1339077v1 contaminated; genus undefined na na haploid archaea 1279113 1270694 37.0 0 215 215 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 06/30/20 1571 1521 38 30592124 +GCA_016196285.1 PRJNA640378 SAMN15435488 JACPXY000000000.1 na 2026773 2026773 Candidatus Pacearchaeota archaeon na NC_groundwater_849_Pr1_B-0.1um_42_10 latest Contig Major Full 2020/12/21 ASM1619628v1 Innovative Genomics Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/196/285/GCA_016196285.1_ASM1619628v1 derived from metagenome; genus undefined na na haploid archaea 863944 863944 42.0 0 72 72 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/21/20 1055 1015 37 na +GCA_016839815.1 PRJNA680430 SAMN16492231 JAEOTM000000000.1 na 2800102 2800102 Candidatus Hodarchaeota archaeon na YT2_004 latest Contig Major Full 2021/02/09 ASM1683981v1 Shenzhen Univeristy na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/839/815/GCA_016839815.1_ASM1683981v1 derived from metagenome; genus undefined na na haploid archaea 4244502 4244502 36.0 0 206 206 na na na 0 0 0 na +GCA_017656495.1 PRJNA635695 SAMN15049706 JACDNS000000000.1 na 35749 35749 Thermococcus sp. na GB_MAG1_027 latest Contig Major Full 2021/04/01 ASM1765649v1 Marine Biological Laboratory na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/656/495/GCA_017656495.1_ASM1765649v1 derived from metagenome na na haploid archaea 2048078 2048078 40.0 0 25 25 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/15/20 2252 2203 26 na +GCA_018304485.1 PRJNA288027 SAMN18341270 JAGVWB000000000.1 na 2026736 2026736 Candidatus Diapherotrites archaeon na RIFCSPLOWO2_01_FULL_43_13 latest Scaffold Major Full 2021/05/07 ASM1830448v1 Banfield Lab, University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/304/485/GCA_018304485.1_ASM1830448v1 derived from metagenome; genus undefined na na haploid archaea 870445 870344 42.5 0 40 40 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/30/21 1067 1023 39 na +GCA_018645535.1 PRJNA630981 SAMN14913871 JABGWN000000000.1 na 2026739 2026739 Euryarchaeota archaeon na SI034_bin52 latest Contig Major Full 2021/06/02 ASM1864553v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/645/535/GCA_018645535.1_ASM1864553v1 derived from metagenome; genus undefined na na haploid archaea 1736086 1736086 44.5 0 299 299 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 1674 1629 41 33504941 +GCA_018676255.1 PRJNA630981 SAMN14914095 JABHFD000000000.1 na 2026739 2026739 Euryarchaeota archaeon na SI037_bin172 latest Contig Major Full 2021/06/02 ASM1867625v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/676/255/GCA_018676255.1_ASM1867625v1 derived from metagenome; genus undefined na na haploid archaea 1921001 1921001 48.5 0 158 158 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 1716 1671 38 33504941 +GCA_018692575.1 PRJNA630981 SAMN14914238 JABHKQ000000000.1 na 2026803 2026803 Candidatus Woesearchaeota archaeon na SI037S2_bin24 latest Contig Major Full 2021/06/02 ASM1869257v1 The University of Melbourne na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/692/575/GCA_018692575.1_ASM1869257v1 derived from metagenome; genus undefined na na haploid archaea 581819 581819 31.0 0 32 32 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/19/20 761 715 41 33504941 +GCA_900316635.1 PRJEB21624 SAMEA104666887 ONDQ00000000.1 na 253161 253161 uncultured Methanobrevibacter sp. na RUG201 latest Scaffold Major Full 2018/03/21 Rumen uncultured genome RUG201 THE ROSLIN INSTITUTE na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/316/635/GCA_900316635.1_Rumen_uncultured_genome_RUG201 derived from environmental source na na haploid archaea 1920279 1919943 34.0 0 97 97 na na na 0 0 0 na +GCA_902383905.1 PRJEB33885 SAMEA5851664 na representative genome 1406512 1406512 Candidatus Methanomassiliicoccus intestinalis na MGYG-HGUT-02160 latest Complete Genome Major Full 2019/08/10 UHGG_MGYG-HGUT-02160 EMG GCF_902383905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/383/905/GCA_902383905.1_UHGG_MGYG-HGUT-02160 na na na haploid archaea 1931651 1931651 41.0 1 1 1 na na na 0 0 0 na +GCA_903858355.1 PRJEB38681 SAMEA6954579 CAIOIP000000000.1 na 2220064 2220064 uncultured Candidatus Micrarchaeota archaeon na AlinenSedimentsD1_bin-0133 latest Contig Major Full 2020/07/16 freshwater MAG --- AlinenSedimentsD1_bin-0133 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/858/355/GCA_903858355.1_freshwater_MAG_---_AlinenSedimentsD1_bin-0133 derived from environmental source; derived from metagenome; fragmented assembly; genus undefined na na haploid archaea 791701 791701 57.5 0 277 277 na na na 0 0 0 na +GCA_903930505.1 PRJEB38681 SAMEA6952057 CAIYYQ000000000.1 na 2026739 2026739 Euryarchaeota archaeon na AlinenSedimentsCore2_bin-0840 latest Contig Major Full 2020/07/18 freshwater MAG --- AlinenSedimentsCore2_bin-0840 BILS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/930/505/GCA_903930505.1_freshwater_MAG_---_AlinenSedimentsCore2_bin-0840 derived from metagenome; genus undefined na na haploid archaea 2093575 2093575 40.0 0 441 441 na na na 0 0 0 na +GCA_011335015.1 PRJNA480137 SAMN09639889 DTGH00000000.1 na 2250274 2250274 Candidatus Micrarchaeota archaeon na SpSt-758 suppressed Contig Major Full 2020/03/16 ASM1133501v1 The University of Hong Kong na na na derived from metagenome; genus undefined na 2022/03/04 haploid archaea 993631 993631 44.5 0 32 32 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/27/19 1149 1099 45 31911466 +GCA_011388575.1 PRJNA480137 SAMN09638894 DRUB00000000.1 na 334771 334771 Ignisphaera aggregans na SpSt-1 suppressed Contig Major Full 2020/03/17 ASM1138857v1 The University of Hong Kong na na na derived from metagenome na 2022/03/04 haploid archaea 2087524 2087524 33.5 0 227 227 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/26/19 2237 2086 50 31911466 +GCA_011389385.1 PRJNA480137 SAMN09639886 DTGE00000000.1 na 2026714 2026714 Candidatus Bathyarchaeota archaeon na SpSt-755 suppressed Contig Major Full 2020/03/17 ASM1138938v1 The University of Hong Kong na na na derived from metagenome; genus undefined na 2022/03/04 haploid archaea 1245025 1245025 52.0 0 166 166 NCBI NCBI Prokaryotic Genome Annotation Pipeline 02/27/19 1297 1261 21 31911466 diff --git a/tests/files/genomes/genbank/assembly_summary_genbank.txt b/tests/files/genomes/genbank/assembly_summary_genbank.txt index 9677d66..3fbe553 100644 --- a/tests/files/genomes/genbank/assembly_summary_genbank.txt +++ b/tests/files/genomes/genbank/assembly_summary_genbank.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_002566855.1 PRJNA400804 SAMN07598389 NUZM00000000.1 na 1396 1396 Bacillus cereus strain=AFS074515 latest Scaffold Major Full 2017/10/17 ASM256685v1 UNC Chapel Hill GCF_002566855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/566/855/GCA_002566855.1_ASM256685v1 na -GCA_902635445.1 PRJEB33281 SAMEA6073950 CACPNU000000000.1 na 198431 198431 uncultured prokaryote latest Contig Major Full 2019/11/05 AG-915-F08 WOODS HOLE OCEANOGRAPHIC INSTITUTION na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/635/445/GCA_902635445.1_AG-915-F08 derived from environmental source; derived from metagenome na -GCA_017159575.1 PRJNA287430 SAMN17764286 AAZEKK000000000.1 na 197 197 Campylobacter jejuni strain=FSIS12137393 latest Contig Major Full 2021/03/03 PDT000946857.1 USDA FSIS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/159/575/GCA_017159575.1_PDT000946857.1 from large multi-isolate project na -GCA_005728625.1 PRJNA280335 SAMN10715290 AADQWW000000000.1 na 28901 28901 Salmonella enterica strain=ADRDL-2252 latest Contig Major Full 2019/05/23 PDT000448312.1 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/728/625/GCA_005728625.1_PDT000448312.1 from large multi-isolate project na -GCA_013911495.1 PRJNA638822 SAMN15215249 JACETB000000000.1 na 1131 1131 Synechococcus sp. MCMED-G31 latest Contig Major Full 2020/07/29 ASM1391149v1 Evolutionary Genomics Group na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/911/495/GCA_013911495.1_ASM1391149v1 derived from metagenome na -GCA_004008395.1 na 2499034 2499034 Mycobacterium phage Cici latest Complete Genome Major Full 2019/01/08 ASM400839v1 na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/008/395/GCA_004008395.1_ASM400839v1 na -GCA_021355205.1 na 2894335 2894335 Burkholderia phage BgManors32 latest Complete Genome Major Full 2021/11/22 ASM2135520v1 na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/355/205/GCA_021355205.1_ASM2135520v1 na -GCA_003635585.1 PRJNA374603 SAMN06329599 MVSU00000000.1 na 210 210 Helicobacter pylori strain=HPAS14 latest Contig Major Full 2018/10/12 ASM363558v1 University of Western Australia GCF_003635585.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/635/585/GCA_003635585.1_ASM363558v1 na -GCA_012763735.1 PRJNA277984 SAMN04510396 AATCVN000000000.1 na 562 562 Escherichia coli strain=CDPHFDLB-F1602032-026A latest Contig Major Full 2020/04/23 PDT000113200.3 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/763/735/GCA_012763735.1_PDT000113200.3 from large multi-isolate project na -GCA_013619715.1 PRJNA615626 SAMN14453445 JACEKU000000000.1 na 287 287 Pseudomonas aeruginosa strain=LiP14 latest Contig Major Full 2020/07/24 ASM1361971v1 University of Oxford GCF_013619715.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/619/715/GCA_013619715.1_ASM1361971v1 na -GCA_008787855.1 PRJNA292661 SAMN12842867 AALEUD000000000.1 na 28901 28901 Salmonella enterica strain=CVM N19S0343 latest Contig Major Full 2019/10/01 PDT000594120.1 FDA na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/787/855/GCA_008787855.1_PDT000594120.1 from large multi-isolate project na -GCA_903218915.1 PRJEB35770 SAMEA6813852 CAEZVL000000000.1 na 449393 449393 freshwater metagenome latest Contig Major Full 2020/06/05 UFOp-RE-23may17-586 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/218/915/GCA_903218915.1_UFOp-RE-23may17-586 derived from environmental source; metagenome na -GCA_008201245.1 PRJNA248792 SAMN03479222 AAJWIJ000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=7397 latest Contig Major Full 2019/09/02 PDT000058697.2 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/201/245/GCA_008201245.1_PDT000058697.2 from large multi-isolate project na -GCA_011078725.1 PRJNA248792 SAMN03168749 AAPFHW000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=H120980533 latest Contig Major Full 2020/03/09 PDT000042974.4 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/078/725/GCA_011078725.1_PDT000042974.4 from large multi-isolate project na -GCA_013549135.1 PRJNA230403 SAMN15522001 AATZYI000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS152956 latest Contig Major Full 2020/07/23 PDT000787515.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/549/135/GCA_013549135.1_PDT000787515.1 from large multi-isolate project na -GCA_018937815.1 PRJNA218110 SAMN19697485 ABAWPX000000000.1 na 562 562 Escherichia coli strain=PNUSAE074529 latest Contig Major Full 2021/06/17 PDT001069867.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/937/815/GCA_018937815.1_PDT001069867.1 from large multi-isolate project na -GCA_005603115.1 PRJNA230403 SAMN11552442 AADIAU000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS073825 latest Contig Major Full 2019/05/21 PDT000496874.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/603/115/GCA_005603115.1_PDT000496874.1 from large multi-isolate project na -GCA_019997905.1 PRJNA685966 SAMN21249929 na 283734 283734 Staphylococcus pseudintermedius strain=HSP149 latest Complete Genome Major Full 2021/09/15 ASM1999790v1 Universitat Autonoma de Barcelona na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/997/905/GCA_019997905.1_ASM1999790v1 from large multi-isolate project na -GCA_011897165.1 PRJNA218110 SAMN12361411 AARDFA000000000.1 na 562 562 Escherichia coli strain=PNUSAE027109 latest Contig Major Full 2020/04/02 PDT000549212.2 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/897/165/GCA_011897165.1_PDT000549212.2 from large multi-isolate project na -GCA_015893745.1 PRJNA514245 SAMN15566993 DACSEB000000000.1 na 575 575 Raoultella planticola MISC077 latest Contig Major Full 2020/12/09 PDT000883933.1 National Center for Biotechnology Information na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/015/893/745/GCA_015893745.1_PDT000883933.1 from large multi-isolate project na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_002566855.1 PRJNA400804 SAMN07598389 NUZM00000000.1 na 1396 1396 Bacillus cereus strain=AFS074515 na latest Scaffold Major Full 2017/10/17 ASM256685v1 UNC Chapel Hill GCF_002566855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/566/855/GCA_002566855.1_ASM256685v1 na na na haploid bacteria 5958823 5958450 34.5 0 100 100 NCBI NCBI Prokaryotic Genome Annotation Pipeline 09/17/17 6299 5914 73 na +GCA_003635585.1 PRJNA374603 SAMN06329599 MVSU00000000.1 na 210 210 Helicobacter pylori strain=HPAS14 na latest Contig Major Full 2018/10/12 ASM363558v1 University of Western Australia GCF_003635585.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/635/585/GCA_003635585.1_ASM363558v1 na na na haploid bacteria 1577919 1577919 39.0 0 36 36 na na na 0 0 0 na +GCA_004008395.1 na na na na 2499034 2499034 Mycobacterium phage Cici na na latest Complete Genome Major Full 2019/01/08 ASM400839v1 na na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/008/395/GCA_004008395.1_ASM400839v1 na na na haploid viral 51287 51287 63.5 1 1 1 na INSDC Submitter 09/17/20 86 86 0 na +GCA_005603115.1 PRJNA230403 SAMN11552442 AADIAU000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS073825 na latest Contig Major Full 2019/05/21 PDT000496874.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/603/115/GCA_005603115.1_PDT000496874.1 from large multi-isolate project na na haploid bacteria 4688692 4688692 52.0 0 38 38 NCBI NCBI Prokaryotic Genome Annotation Pipeline 05/01/19 4640 4440 104 na +GCA_005728625.1 PRJNA280335 SAMN10715290 AADQWW000000000.1 na 28901 28901 Salmonella enterica strain=ADRDL-2252 na latest Contig Major Full 2019/05/23 PDT000448312.1 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/728/625/GCA_005728625.1_PDT000448312.1 from large multi-isolate project na na haploid bacteria 4919847 4919847 52.0 0 208 208 NCBI NCBI Prokaryotic Genome Annotation Pipeline 01/23/19 5023 4812 98 na +GCA_008201245.1 PRJNA248792 SAMN03479222 AAJWIJ000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=7397 na latest Contig Major Full 2019/09/02 PDT000058697.2 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/201/245/GCA_008201245.1_PDT000058697.2 from large multi-isolate project na na haploid bacteria 4987018 4987018 52.0 0 167 167 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/26/19 5026 4863 62 na +GCA_008787855.1 PRJNA292661 SAMN12842867 AALEUD000000000.1 na 192955 28901 Salmonella enterica subsp. enterica serovar Kentucky strain=CVM N19S0343 na latest Contig Major Full 2019/10/01 PDT000594120.1 FDA na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/787/855/GCA_008787855.1_PDT000594120.1 from large multi-isolate project na na haploid bacteria 4968037 4968037 51.5 0 64 64 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/28/19 4935 4675 106 na +GCA_011078725.1 PRJNA248792 SAMN03168749 AAPFHW000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=H120980533 na latest Contig Major Full 2020/03/09 PDT000042974.4 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/078/725/GCA_011078725.1_PDT000042974.4 from large multi-isolate project na na haploid bacteria 4937268 4937268 52.0 0 112 112 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/20/19 4913 4750 70 na +GCA_011897165.1 PRJNA218110 SAMN12361411 AARDFA000000000.1 na 562 562 Escherichia coli strain=PNUSAE027109 na latest Contig Major Full 2020/04/02 PDT000549212.2 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/897/165/GCA_011897165.1_PDT000549212.2 from large multi-isolate project na na haploid bacteria 5406246 5406246 50.0 0 161 161 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/20/20 5428 5153 100 na +GCA_012763735.1 PRJNA277984 SAMN04510396 AATCVN000000000.1 na 562 562 Escherichia coli strain=CDPHFDLB-F1602032-026A na latest Contig Major Full 2020/04/23 PDT000113200.3 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/763/735/GCA_012763735.1_PDT000113200.3 from large multi-isolate project na na haploid bacteria 5293312 5293312 50.0 0 346 346 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/29/20 5363 5106 100 na +GCA_013549135.1 PRJNA230403 SAMN15522001 AATZYI000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS152956 na latest Contig Major Full 2020/07/23 PDT000787515.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/549/135/GCA_013549135.1_PDT000787515.1 from large multi-isolate project na na haploid bacteria 4663961 4663961 52.0 0 29 29 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/14/20 4486 4310 99 na +GCA_013619715.1 PRJNA615626 SAMN14453445 JACEKU000000000.1 na 287 287 Pseudomonas aeruginosa strain=LiP14 na latest Contig Major Full 2020/07/24 ASM1361971v1 University of Oxford GCF_013619715.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/619/715/GCA_013619715.1_ASM1361971v1 na na na haploid bacteria 6453882 6453882 66.0 0 36 36 na na na 0 0 0 na +GCA_013911495.1 PRJNA638822 SAMN15215249 JACETB000000000.1 na 1131 1131 Synechococcus sp. na MCMED-G31 latest Contig Major Full 2020/07/29 ASM1391149v1 Evolutionary Genomics Group na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/911/495/GCA_013911495.1_ASM1391149v1 derived from metagenome na na haploid bacteria 1342507 1342507 61.5 0 102 102 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/24/20 1566 1534 28 na +GCA_015893745.1 PRJNA514245 SAMN15566993 DACSEB000000000.1 na 575 575 Raoultella planticola na MISC077 latest Contig Major Full 2020/12/09 PDT000883933.1 National Center for Biotechnology Information na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/015/893/745/GCA_015893745.1_PDT000883933.1 from large multi-isolate project na na haploid bacteria 5579674 5579674 55.5 0 73 73 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/10/20 5214 5054 79 30286803 +GCA_017159575.1 PRJNA287430 SAMN17764286 AAZEKK000000000.1 na 197 197 Campylobacter jejuni strain=FSIS12137393 na latest Contig Major Full 2021/03/03 PDT000946857.1 USDA FSIS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/159/575/GCA_017159575.1_PDT000946857.1 from large multi-isolate project na na haploid bacteria 1711580 1711580 30.0 0 24 24 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/03/21 1815 1717 40 na +GCA_018937815.1 PRJNA218110 SAMN19697485 ABAWPX000000000.1 na 562 562 Escherichia coli strain=PNUSAE074529 na latest Contig Major Full 2021/06/17 PDT001069867.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/937/815/GCA_018937815.1_PDT001069867.1 from large multi-isolate project na na haploid bacteria 5368752 5368752 50.5 0 204 204 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 06/15/21 5468 5198 95 na +GCA_021355205.1 na na na na 2894335 2894335 Burkholderia phage BgManors32 na na latest Complete Genome Major Full 2021/11/17 ASM2135520v1 na GCF_021355205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/355/205/GCA_021355205.1_ASM2135520v1 na ICTV species exemplar na haploid viral 59762 59762 65.5 1 1 1 na na na na na na na +GCA_902635445.1 PRJEB33281 SAMEA6073950 CACPNU000000000.1 na 198431 198431 uncultured prokaryote na na latest Contig Major Full 2019/11/05 AG-915-F08 WOODS HOLE OCEANOGRAPHIC INSTITUTION na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/635/445/GCA_902635445.1_AG-915-F08 derived from environmental source; derived from metagenome; genus undefined na na haploid other 92318 92318 29.5 0 5 5 na na na 0 0 0 na +GCA_903218915.1 PRJEB35770 SAMEA6813852 CAEZVL000000000.1 na 449393 449393 freshwater metagenome na na latest Contig Major Full 2020/06/05 UFOp-RE-23may17-586 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/218/915/GCA_903218915.1_UFOp-RE-23may17-586 derived from environmental source; genus undefined; metagenome na na haploid metagenomes 1184075 1184075 52.0 0 217 217 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO Annotation submitted by BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO 06/05/20 1183 1163 20 na +GCA_019997905.1 PRJNA685966 SAMN21249929 na na 283734 283734 Staphylococcus pseudintermedius strain=HSP149 na replaced Complete Genome Major Full 2021/09/15 ASM1999790v1 Universitat Autonoma de Barcelona na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/997/905/GCA_019997905.1_ASM1999790v1 from large multi-isolate project na 2022/07/01 haploid bacteria 2592238 2592238 37.5 1 1 1 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/08/2021 12:19:04 2465 1929 81 na diff --git a/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt b/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt index 9677d66..3fbe553 100644 --- a/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt +++ b/tests/files/genomes/genbank/assembly_summary_genbank_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_002566855.1 PRJNA400804 SAMN07598389 NUZM00000000.1 na 1396 1396 Bacillus cereus strain=AFS074515 latest Scaffold Major Full 2017/10/17 ASM256685v1 UNC Chapel Hill GCF_002566855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/566/855/GCA_002566855.1_ASM256685v1 na -GCA_902635445.1 PRJEB33281 SAMEA6073950 CACPNU000000000.1 na 198431 198431 uncultured prokaryote latest Contig Major Full 2019/11/05 AG-915-F08 WOODS HOLE OCEANOGRAPHIC INSTITUTION na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/635/445/GCA_902635445.1_AG-915-F08 derived from environmental source; derived from metagenome na -GCA_017159575.1 PRJNA287430 SAMN17764286 AAZEKK000000000.1 na 197 197 Campylobacter jejuni strain=FSIS12137393 latest Contig Major Full 2021/03/03 PDT000946857.1 USDA FSIS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/159/575/GCA_017159575.1_PDT000946857.1 from large multi-isolate project na -GCA_005728625.1 PRJNA280335 SAMN10715290 AADQWW000000000.1 na 28901 28901 Salmonella enterica strain=ADRDL-2252 latest Contig Major Full 2019/05/23 PDT000448312.1 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/728/625/GCA_005728625.1_PDT000448312.1 from large multi-isolate project na -GCA_013911495.1 PRJNA638822 SAMN15215249 JACETB000000000.1 na 1131 1131 Synechococcus sp. MCMED-G31 latest Contig Major Full 2020/07/29 ASM1391149v1 Evolutionary Genomics Group na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/911/495/GCA_013911495.1_ASM1391149v1 derived from metagenome na -GCA_004008395.1 na 2499034 2499034 Mycobacterium phage Cici latest Complete Genome Major Full 2019/01/08 ASM400839v1 na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/008/395/GCA_004008395.1_ASM400839v1 na -GCA_021355205.1 na 2894335 2894335 Burkholderia phage BgManors32 latest Complete Genome Major Full 2021/11/22 ASM2135520v1 na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/355/205/GCA_021355205.1_ASM2135520v1 na -GCA_003635585.1 PRJNA374603 SAMN06329599 MVSU00000000.1 na 210 210 Helicobacter pylori strain=HPAS14 latest Contig Major Full 2018/10/12 ASM363558v1 University of Western Australia GCF_003635585.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/635/585/GCA_003635585.1_ASM363558v1 na -GCA_012763735.1 PRJNA277984 SAMN04510396 AATCVN000000000.1 na 562 562 Escherichia coli strain=CDPHFDLB-F1602032-026A latest Contig Major Full 2020/04/23 PDT000113200.3 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/763/735/GCA_012763735.1_PDT000113200.3 from large multi-isolate project na -GCA_013619715.1 PRJNA615626 SAMN14453445 JACEKU000000000.1 na 287 287 Pseudomonas aeruginosa strain=LiP14 latest Contig Major Full 2020/07/24 ASM1361971v1 University of Oxford GCF_013619715.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/619/715/GCA_013619715.1_ASM1361971v1 na -GCA_008787855.1 PRJNA292661 SAMN12842867 AALEUD000000000.1 na 28901 28901 Salmonella enterica strain=CVM N19S0343 latest Contig Major Full 2019/10/01 PDT000594120.1 FDA na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/787/855/GCA_008787855.1_PDT000594120.1 from large multi-isolate project na -GCA_903218915.1 PRJEB35770 SAMEA6813852 CAEZVL000000000.1 na 449393 449393 freshwater metagenome latest Contig Major Full 2020/06/05 UFOp-RE-23may17-586 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/218/915/GCA_903218915.1_UFOp-RE-23may17-586 derived from environmental source; metagenome na -GCA_008201245.1 PRJNA248792 SAMN03479222 AAJWIJ000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=7397 latest Contig Major Full 2019/09/02 PDT000058697.2 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/201/245/GCA_008201245.1_PDT000058697.2 from large multi-isolate project na -GCA_011078725.1 PRJNA248792 SAMN03168749 AAPFHW000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=H120980533 latest Contig Major Full 2020/03/09 PDT000042974.4 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/078/725/GCA_011078725.1_PDT000042974.4 from large multi-isolate project na -GCA_013549135.1 PRJNA230403 SAMN15522001 AATZYI000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS152956 latest Contig Major Full 2020/07/23 PDT000787515.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/549/135/GCA_013549135.1_PDT000787515.1 from large multi-isolate project na -GCA_018937815.1 PRJNA218110 SAMN19697485 ABAWPX000000000.1 na 562 562 Escherichia coli strain=PNUSAE074529 latest Contig Major Full 2021/06/17 PDT001069867.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/937/815/GCA_018937815.1_PDT001069867.1 from large multi-isolate project na -GCA_005603115.1 PRJNA230403 SAMN11552442 AADIAU000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS073825 latest Contig Major Full 2019/05/21 PDT000496874.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/603/115/GCA_005603115.1_PDT000496874.1 from large multi-isolate project na -GCA_019997905.1 PRJNA685966 SAMN21249929 na 283734 283734 Staphylococcus pseudintermedius strain=HSP149 latest Complete Genome Major Full 2021/09/15 ASM1999790v1 Universitat Autonoma de Barcelona na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/997/905/GCA_019997905.1_ASM1999790v1 from large multi-isolate project na -GCA_011897165.1 PRJNA218110 SAMN12361411 AARDFA000000000.1 na 562 562 Escherichia coli strain=PNUSAE027109 latest Contig Major Full 2020/04/02 PDT000549212.2 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/897/165/GCA_011897165.1_PDT000549212.2 from large multi-isolate project na -GCA_015893745.1 PRJNA514245 SAMN15566993 DACSEB000000000.1 na 575 575 Raoultella planticola MISC077 latest Contig Major Full 2020/12/09 PDT000883933.1 National Center for Biotechnology Information na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/015/893/745/GCA_015893745.1_PDT000883933.1 from large multi-isolate project na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_002566855.1 PRJNA400804 SAMN07598389 NUZM00000000.1 na 1396 1396 Bacillus cereus strain=AFS074515 na latest Scaffold Major Full 2017/10/17 ASM256685v1 UNC Chapel Hill GCF_002566855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/566/855/GCA_002566855.1_ASM256685v1 na na na haploid bacteria 5958823 5958450 34.5 0 100 100 NCBI NCBI Prokaryotic Genome Annotation Pipeline 09/17/17 6299 5914 73 na +GCA_003635585.1 PRJNA374603 SAMN06329599 MVSU00000000.1 na 210 210 Helicobacter pylori strain=HPAS14 na latest Contig Major Full 2018/10/12 ASM363558v1 University of Western Australia GCF_003635585.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/635/585/GCA_003635585.1_ASM363558v1 na na na haploid bacteria 1577919 1577919 39.0 0 36 36 na na na 0 0 0 na +GCA_004008395.1 na na na na 2499034 2499034 Mycobacterium phage Cici na na latest Complete Genome Major Full 2019/01/08 ASM400839v1 na na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/008/395/GCA_004008395.1_ASM400839v1 na na na haploid viral 51287 51287 63.5 1 1 1 na INSDC Submitter 09/17/20 86 86 0 na +GCA_005603115.1 PRJNA230403 SAMN11552442 AADIAU000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS073825 na latest Contig Major Full 2019/05/21 PDT000496874.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/603/115/GCA_005603115.1_PDT000496874.1 from large multi-isolate project na na haploid bacteria 4688692 4688692 52.0 0 38 38 NCBI NCBI Prokaryotic Genome Annotation Pipeline 05/01/19 4640 4440 104 na +GCA_005728625.1 PRJNA280335 SAMN10715290 AADQWW000000000.1 na 28901 28901 Salmonella enterica strain=ADRDL-2252 na latest Contig Major Full 2019/05/23 PDT000448312.1 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/005/728/625/GCA_005728625.1_PDT000448312.1 from large multi-isolate project na na haploid bacteria 4919847 4919847 52.0 0 208 208 NCBI NCBI Prokaryotic Genome Annotation Pipeline 01/23/19 5023 4812 98 na +GCA_008201245.1 PRJNA248792 SAMN03479222 AAJWIJ000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=7397 na latest Contig Major Full 2019/09/02 PDT000058697.2 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/201/245/GCA_008201245.1_PDT000058697.2 from large multi-isolate project na na haploid bacteria 4987018 4987018 52.0 0 167 167 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/26/19 5026 4863 62 na +GCA_008787855.1 PRJNA292661 SAMN12842867 AALEUD000000000.1 na 192955 28901 Salmonella enterica subsp. enterica serovar Kentucky strain=CVM N19S0343 na latest Contig Major Full 2019/10/01 PDT000594120.1 FDA na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/008/787/855/GCA_008787855.1_PDT000594120.1 from large multi-isolate project na na haploid bacteria 4968037 4968037 51.5 0 64 64 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/28/19 4935 4675 106 na +GCA_011078725.1 PRJNA248792 SAMN03168749 AAPFHW000000000.1 na 90371 28901 Salmonella enterica subsp. enterica serovar Typhimurium strain=H120980533 na latest Contig Major Full 2020/03/09 PDT000042974.4 Public Health England na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/078/725/GCA_011078725.1_PDT000042974.4 from large multi-isolate project na na haploid bacteria 4937268 4937268 52.0 0 112 112 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 08/20/19 4913 4750 70 na +GCA_011897165.1 PRJNA218110 SAMN12361411 AARDFA000000000.1 na 562 562 Escherichia coli strain=PNUSAE027109 na latest Contig Major Full 2020/04/02 PDT000549212.2 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/897/165/GCA_011897165.1_PDT000549212.2 from large multi-isolate project na na haploid bacteria 5406246 5406246 50.0 0 161 161 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/20/20 5428 5153 100 na +GCA_012763735.1 PRJNA277984 SAMN04510396 AATCVN000000000.1 na 562 562 Escherichia coli strain=CDPHFDLB-F1602032-026A na latest Contig Major Full 2020/04/23 PDT000113200.3 US Food and Drug Administration na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/763/735/GCA_012763735.1_PDT000113200.3 from large multi-isolate project na na haploid bacteria 5293312 5293312 50.0 0 346 346 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/29/20 5363 5106 100 na +GCA_013549135.1 PRJNA230403 SAMN15522001 AATZYI000000000.1 na 28901 28901 Salmonella enterica strain=PNUSAS152956 na latest Contig Major Full 2020/07/23 PDT000787515.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/549/135/GCA_013549135.1_PDT000787515.1 from large multi-isolate project na na haploid bacteria 4663961 4663961 52.0 0 29 29 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/14/20 4486 4310 99 na +GCA_013619715.1 PRJNA615626 SAMN14453445 JACEKU000000000.1 na 287 287 Pseudomonas aeruginosa strain=LiP14 na latest Contig Major Full 2020/07/24 ASM1361971v1 University of Oxford GCF_013619715.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/619/715/GCA_013619715.1_ASM1361971v1 na na na haploid bacteria 6453882 6453882 66.0 0 36 36 na na na 0 0 0 na +GCA_013911495.1 PRJNA638822 SAMN15215249 JACETB000000000.1 na 1131 1131 Synechococcus sp. na MCMED-G31 latest Contig Major Full 2020/07/29 ASM1391149v1 Evolutionary Genomics Group na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/911/495/GCA_013911495.1_ASM1391149v1 derived from metagenome na na haploid bacteria 1342507 1342507 61.5 0 102 102 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/24/20 1566 1534 28 na +GCA_015893745.1 PRJNA514245 SAMN15566993 DACSEB000000000.1 na 575 575 Raoultella planticola na MISC077 latest Contig Major Full 2020/12/09 PDT000883933.1 National Center for Biotechnology Information na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/015/893/745/GCA_015893745.1_PDT000883933.1 from large multi-isolate project na na haploid bacteria 5579674 5579674 55.5 0 73 73 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/10/20 5214 5054 79 30286803 +GCA_017159575.1 PRJNA287430 SAMN17764286 AAZEKK000000000.1 na 197 197 Campylobacter jejuni strain=FSIS12137393 na latest Contig Major Full 2021/03/03 PDT000946857.1 USDA FSIS na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/159/575/GCA_017159575.1_PDT000946857.1 from large multi-isolate project na na haploid bacteria 1711580 1711580 30.0 0 24 24 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/03/21 1815 1717 40 na +GCA_018937815.1 PRJNA218110 SAMN19697485 ABAWPX000000000.1 na 562 562 Escherichia coli strain=PNUSAE074529 na latest Contig Major Full 2021/06/17 PDT001069867.1 CDC na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/937/815/GCA_018937815.1_PDT001069867.1 from large multi-isolate project na na haploid bacteria 5368752 5368752 50.5 0 204 204 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 06/15/21 5468 5198 95 na +GCA_021355205.1 na na na na 2894335 2894335 Burkholderia phage BgManors32 na na latest Complete Genome Major Full 2021/11/17 ASM2135520v1 na GCF_021355205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/355/205/GCA_021355205.1_ASM2135520v1 na ICTV species exemplar na haploid viral 59762 59762 65.5 1 1 1 na na na na na na na +GCA_902635445.1 PRJEB33281 SAMEA6073950 CACPNU000000000.1 na 198431 198431 uncultured prokaryote na na latest Contig Major Full 2019/11/05 AG-915-F08 WOODS HOLE OCEANOGRAPHIC INSTITUTION na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/902/635/445/GCA_902635445.1_AG-915-F08 derived from environmental source; derived from metagenome; genus undefined na na haploid other 92318 92318 29.5 0 5 5 na na na 0 0 0 na +GCA_903218915.1 PRJEB35770 SAMEA6813852 CAEZVL000000000.1 na 449393 449393 freshwater metagenome na na latest Contig Major Full 2020/06/05 UFOp-RE-23may17-586 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/903/218/915/GCA_903218915.1_UFOp-RE-23may17-586 derived from environmental source; genus undefined; metagenome na na haploid metagenomes 1184075 1184075 52.0 0 217 217 BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO Annotation submitted by BIOLOGY CENTRE ASCR, V.V.I., INSTITUTE OF HYDROBIO 06/05/20 1183 1163 20 na +GCA_019997905.1 PRJNA685966 SAMN21249929 na na 283734 283734 Staphylococcus pseudintermedius strain=HSP149 na replaced Complete Genome Major Full 2021/09/15 ASM1999790v1 Universitat Autonoma de Barcelona na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/997/905/GCA_019997905.1_ASM1999790v1 from large multi-isolate project na 2022/07/01 haploid bacteria 2592238 2592238 37.5 1 1 1 NCBI NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/08/2021 12:19:04 2465 1929 81 na diff --git a/tests/files/genomes/genbank/fungi/assembly_summary.txt b/tests/files/genomes/genbank/fungi/assembly_summary.txt index e6f940c..989e2e1 100644 --- a/tests/files/genomes/genbank/fungi/assembly_summary.txt +++ b/tests/files/genomes/genbank/fungi/assembly_summary.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_003708985.2 PRJNA429441 SAMN08343249 PPPC00000000.2 representative genome 271357 271357 [Candida] gorgasii strain=NRRL Y-27707 latest Scaffold Major Full 2018/11/20 ASM370898v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/708/985/GCA_003708985.2_ASM370898v2 assembly from type material na -GCA_001599295.1 PRJDB3621 SAMD00028341 BCGN00000000.1 representative genome 54094 54094 Sporopachydermia quercuum strain=JCM 9486 latest Scaffold Major Full 2016/03/01 JCM_9486_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/599/295/GCA_001599295.1_JCM_9486_assembly_v001 na -GCA_001636725.1 PRJNA72737 SAMN04908328 AZHB00000000.1 representative genome 1081104 114497 Cordyceps fumosorosea ARSEF 2679 strain=ARSEF 2679 latest Scaffold Major Full 2016/05/04 ISF 1.0 Shanghai Institutes for Biological Sciences, CAS GCF_001636725.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/636/725/GCA_001636725.1_ISF_1.0 na -GCA_000467735.1 PRJNA81799 SAMN02981409 AJFL00000000.1 representative genome 1136865 37885 Rhytidhysteron rufulum CBS 306.38 strain=CBS 306.38 latest Contig Major Full 2013/09/16 ASM46773v1 Assembling the Fungal Tree of Life (AFTOL) na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/467/735/GCA_000467735.1_ASM46773v1 na -GCA_001950535.1 PRJDB3737 SAMD00028438 BCKA00000000.1 representative genome 5077 5077 Penicillium citrinum strain=JCM 22607 latest Scaffold Major Full 2016/12/09 JCM_22607_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/950/535/GCA_001950535.1_JCM_22607_assembly_v001 na -GCA_003277105.1 PRJNA396809 SAMN07436824 NPYI00000000.1 na 4932 4932 Saccharomyces cerevisiae strain=HN7 latest Chromosome Major Full 2018/11/28 ASM327710v1 Institute Of Microbiology Chinese Academy of Sciences na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/277/105/GCA_003277105.1_ASM327710v1 na -GCA_009803805.1 PRJNA487060 SAMN09910564 RAMV00000000.1 na 29879 29879 Neurospora discreta ecotype=NMWA, /strain=PS4BIDRA449 latest Scaffold Major Full 2019/12/27 ASM980380v1 University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/803/805/GCA_009803805.1_ASM980380v1 na -GCA_019207905.1 PRJNA706707 SAMN18128823 JAHLVQ000000000.1 na 460523 460523 Ogataea polymorpha Y-2423 latest Scaffold Major Full 2021/07/13 ASM1920790v1 Colorado College na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/207/905/GCA_019207905.1_ASM1920790v1 na -GCA_013282825.1 PRJNA602542 SAMN13878901 JAACJH000000000.1 na 156630 156630 Alternaria arborescens strain=NRRL 20593 latest Scaffold Major Full 2020/06/06 ASM1328282v1 US Department of Agriculture, Agriculture Research Service na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/282/825/GCA_013282825.1_ASM1328282v1 na -GCA_012656115.1 PRJNA592352 SAMN13422809 JAAAQC000000000.1 na 746128 746128 Aspergillus fumigatus strain=CNM-CM8686 latest Scaffold Major Full 2020/04/22 ASM1265611v1 UNICAMP na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/656/115/GCA_012656115.1_ASM1265611v1 na -GCA_905105095.1 PRJEB27419 SAMEA4753515 CAJHKB000000000.1 na 318829 318829 Pyricularia oryzae latest Scaffold Major Full 2020/11/22 Assembly of M.oryzae isolate BF48 genome UNIVERSITY OF EXETER na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/105/095/GCA_905105095.1_Assembly_of_M.oryzae_isolate_BF48_genome na -GCA_004917135.1 PRJNA488010 SAMN10031622 QZAJ00000000.1 na 5580 5580 Aureobasidium pullulans strain=EXF-11318 latest Contig Major Full 2019/04/26 ASM491713v1 Biotechnical Faculty, University of Ljubljana na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/917/135/GCA_004917135.1_ASM491713v1 na -GCA_007556565.1 PRJNA534185 SAMN11479276 SWCR00000000.1 representative genome 40997 40997 Elsinoe fawcettii DAR-70024 latest Scaffold Major Full 2019/07/26 ASM755656v1 Yeungnam University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/007/556/565/GCA_007556565.1_ASM755656v1 na -GCA_011022315.1 PRJNA522669 SAMN10948597 representative genome 27292 27292 Saccharomyces pastorianus strain=CBS 1483 latest Chromosome Major Full 2020/02/26 ASM1102231v1 Delft University of Technology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/022/315/GCA_011022315.1_ASM1102231v1 na -GCA_017867755.1 PRJNA680387 SAMN16879102 JAEDSJ000000000.1 na 4932 4932 Saccharomyces cerevisiae strain=SAN33 latest Scaffold Major Full 2021/04/08 ASM1786775v1 Institute of Microbiology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/867/755/GCA_017867755.1_ASM1786775v1 na -GCA_001680595.1 PRJNA289542 SAMN03857101 MAEE00000000.1 na 232081 232081 Fusarium tucumaniae strain=NRRL 31781 latest Contig Major Full 2016/07/06 ASM168059v1 Iowa State University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/680/595/GCA_001680595.1_ASM168059v1 na -GCA_018345925.1 PRJNA677929 SAMN16774514 JADPOE000000000.1 na 5518 5518 Fusarium graminearum strain=042826 latest Scaffold Major Full 2021/05/12 ASM1834592v1 University of Warmia and Mazury in Olsztyn na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/345/925/GCA_018345925.1_ASM1834592v1 na -GCA_003705455.2 PRJNA429441 SAMN08343424 PPIN00000000.2 representative genome 54552 54552 Pichia occidentalis strain=NRRL Y-7552 latest Scaffold Major Full 2018/11/20 ASM370545v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/705/455/GCA_003705455.2_ASM370545v2 assembly from type material na -GCA_000827315.1 PRJNA61203 SAMN00738176 JMDN00000000.1 representative genome 765440 80663 Piloderma croceum F 1598 strain=F 1598 latest Scaffold Major Full 2015/01/30 Piloderma croceum F 1598 v1.0 DOE Joint Genome Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/827/315/GCA_000827315.1_Piloderma_croceum_F_1598_v1.0 na -GCA_905066965.2 PRJEB40915 SAMEA7473260 CAJHIF000000000.2 na 318829 318829 Pyricularia oryzae AG059 latest Contig Major Full 2020/12/16 AG059_contigs_polished THE SAINSBURY LABORATORY na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/066/965/GCA_905066965.2_AG059_contigs_polished na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_000467735.1 PRJNA81799 SAMN02981409 AJFL00000000.1 representative genome 1136865 37885 Rhytidhysteron rufulum CBS 306.38 strain=CBS 306.38 na latest Contig Major Full 2013/09/16 ASM46773v1 Assembling the Fungal Tree of Life (AFTOL) na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/467/735/GCA_000467735.1_ASM46773v1 na na na haploid fungi 39856753 39856753 47.5 0 3976 3976 na na na 0 0 0 na +GCA_000827315.1 PRJNA61203 SAMN00738176 JMDN00000000.1 representative genome 765440 80663 Piloderma croceum F 1598 strain=F 1598 na latest Scaffold Major Full 2015/01/30 Piloderma croceum F 1598 v1.0 DOE Joint Genome Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/827/315/GCA_000827315.1_Piloderma_croceum_F_1598_v1.0 na na na haploid fungi 59326866 52312419 46.0 0 715 4469 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 01/30/15 21607 21519 74 25706625 +GCA_001599295.1 PRJDB3621 SAMD00028341 BCGN00000000.1 representative genome 54094 54094 Sporopachydermia quercuum strain=JCM 9486 na latest Scaffold Major Full 2016/03/01 JCM_9486_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/599/295/GCA_001599295.1_JCM_9486_assembly_v001 na na na haploid fungi 16411463 16381402 36.0 0 15 15 na na na 0 0 0 30415838 +GCA_001636725.1 PRJNA72737 SAMN04908328 AZHB00000000.1 representative genome 1081104 114497 Cordyceps fumosorosea ARSEF 2679 strain=ARSEF 2679 na latest Scaffold Major Full 2016/05/04 ISF 1.0 Shanghai Institutes for Biological Sciences, CAS GCF_001636725.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/636/725/GCA_001636725.1_ISF_1.0 na na na haploid fungi 33485962 33399778 53.5 0 430 430 Shanghai Institutes for Biological Sciences, CAS Annotation submitted by Shanghai Institutes for Biological Sciences, CAS 05/04/16 10061 10061 0 27071652 +GCA_001680595.1 PRJNA289542 SAMN03857101 MAEE00000000.1 na 232081 232081 Fusarium tucumaniae strain=NRRL 31781 na latest Contig Major Full 2016/07/06 ASM168059v1 Iowa State University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/680/595/GCA_001680595.1_ASM168059v1 na na na haploid fungi 49676379 49676379 49.5 0 22979 22979 na na na 0 0 0 na +GCA_001950535.1 PRJDB3737 SAMD00028438 BCKA00000000.1 na 5077 5077 Penicillium citrinum strain=JCM 22607 na latest Scaffold Major Full 2016/12/09 JCM_22607_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/950/535/GCA_001950535.1_JCM_22607_assembly_v001 na na na haploid fungi 33998198 33656517 46.0 0 50 50 na na na 0 0 0 na +GCA_003277105.1 PRJNA396809 SAMN07436824 NPYI00000000.1 na 4932 4932 Saccharomyces cerevisiae strain=HN7 na latest Chromosome Major Full 2018/11/28 ASM327710v1 Institute Of Microbiology Chinese Academy of Sciences na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/277/105/GCA_003277105.1_ASM327710v1 na na na haploid fungi 11674929 11572329 38.0 16 56 56 na na na 0 0 0 na +GCA_003705455.2 PRJNA429441 SAMN08343424 PPIN00000000.2 na 54552 54552 Pichia occidentalis strain=NRRL Y-7552 na latest Scaffold Major Full 2018/11/20 ASM370545v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/705/455/GCA_003705455.2_ASM370545v2 na assembly from type material na haploid fungi 12024331 12017260 40.0 0 720 720 na na na 0 0 0 30415838 +GCA_003708985.2 PRJNA429441 SAMN08343249 PPPC00000000.2 representative genome 271357 271357 [Candida] gorgasii strain=NRRL Y-27707 na latest Scaffold Major Full 2018/11/20 ASM370898v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/708/985/GCA_003708985.2_ASM370898v2 na assembly from type material na haploid fungi 11351459 11351155 36.0 0 95 95 na na na 0 0 0 30415838 +GCA_004917135.1 PRJNA488010 SAMN10031622 QZAJ00000000.1 na 5580 5580 Aureobasidium pullulans strain=EXF-11318 na latest Contig Major Full 2019/04/26 ASM491713v1 Biotechnical Faculty, University of Ljubljana na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/917/135/GCA_004917135.1_ASM491713v1 na na na haploid fungi 28116540 28116540 50.5 0 1490 1490 Biotechnical Faculty, University of Ljubljana Annotation submitted by Biotechnical Faculty, University of Ljubljana 04/26/19 10706 10706 0 na +GCA_007556565.1 PRJNA534185 SAMN11479276 SWCR00000000.1 representative genome 40997 40997 Elsinoe fawcettii na DAR-70024 latest Scaffold Major Full 2019/07/26 ASM755656v1 Yeungnam University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/007/556/565/GCA_007556565.1_ASM755656v1 na na na haploid fungi 26325864 26260432 52.0 0 53 53 na na na 0 0 0 na +GCA_009803805.1 PRJNA487060 SAMN09910564 RAMV00000000.1 na 29879 29879 Neurospora discreta strain=PS4BIDRA449, /ecotype=NMWA na latest Scaffold Major Full 2019/12/27 ASM980380v1 University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/803/805/GCA_009803805.1_ASM980380v1 na na na haploid fungi 37242299 37238495 49.0 0 778 778 na na na 0 0 0 na +GCA_011022315.1 PRJNA522669 SAMN10948597 na representative genome 27292 27292 Saccharomyces pastorianus strain=CBS 1483 na latest Chromosome Major Full 2020/02/26 ASM1102231v1 Delft University of Technology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/022/315/GCA_011022315.1_ASM1102231v1 na na na haploid fungi 22966002 22963002 39.0 31 31 31 Delft University of Technology Annotation submitted by Delft University of Technology 02/26/20 11312 10760 552 31791228 +GCA_012656115.1 PRJNA592352 SAMN13422809 JAAAQC000000000.1 na 746128 746128 Aspergillus fumigatus strain=CNM-CM8686 na latest Scaffold Major Full 2020/04/22 ASM1265611v1 UNICAMP na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/656/115/GCA_012656115.1_ASM1265611v1 na na na haploid fungi 28139937 28139480 49.5 0 1480 1480 UNICAMP Annotation submitted by UNICAMP 04/22/20 8938 8938 0 na +GCA_013282825.1 PRJNA602542 SAMN13878901 JAACJH000000000.1 na 156630 156630 Alternaria arborescens strain=NRRL 20593 na latest Scaffold Major Full 2020/06/06 ASM1328282v1 US Department of Agriculture, Agriculture Research Service na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/282/825/GCA_013282825.1_ASM1328282v1 na na na haploid fungi 33589656 33587128 51.0 0 701 701 na na na 0 0 0 na +GCA_017867755.1 PRJNA680387 SAMN16879102 JAEDSJ000000000.1 na 4932 4932 Saccharomyces cerevisiae strain=SAN33 na latest Scaffold Major Full 2021/04/08 ASM1786775v1 Institute of Microbiology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/867/755/GCA_017867755.1_ASM1786775v1 na na na haploid fungi 11636434 11622311 38.0 0 602 602 na na na 0 0 0 33679656 +GCA_018345925.1 PRJNA677929 SAMN16774514 JADPOE000000000.1 na 5518 5518 Fusarium graminearum strain=042826 na latest Scaffold Major Full 2021/05/12 ASM1834592v1 University of Warmia and Mazury in Olsztyn na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/345/925/GCA_018345925.1_ASM1834592v1 na na na haploid fungi 36060139 36060079 48.0 0 28 28 na na na 0 0 0 na +GCA_019207905.1 PRJNA706707 SAMN18128823 JAHLVQ000000000.1 na 460523 460523 Ogataea polymorpha na Y-2423 latest Scaffold Major Full 2021/07/13 ASM1920790v1 Colorado College na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/207/905/GCA_019207905.1_ASM1920790v1 na na na haploid fungi 8889312 8889172 47.5 0 64 64 Colorado College Annotation submitted by Colorado College 07/13/21 5436 5434 0 na +GCA_905066965.2 PRJEB40915 SAMEA7473260 CAJHIF000000000.2 na 318829 318829 Pyricularia oryzae na AG059 latest Contig Major Full 2020/12/16 AG059_contigs_polished THE SAINSBURY LABORATORY na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/066/965/GCA_905066965.2_AG059_contigs_polished na na na haploid fungi 47743121 47743121 50.5 0 37 37 na na na 0 0 0 na +GCA_905105095.1 PRJEB27419 SAMEA4753515 CAJHKB000000000.1 na 318829 318829 Pyricularia oryzae na na latest Scaffold Major Full 2020/11/22 Assembly of M.oryzae isolate BF48 genome UNIVERSITY OF EXETER na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/105/095/GCA_905105095.1_Assembly_of_M.oryzae_isolate_BF48_genome na na na haploid fungi 40015456 39526353 51.0 0 4592 4592 na na na 0 0 0 na diff --git a/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt b/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt index e6f940c..989e2e1 100644 --- a/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt +++ b/tests/files/genomes/genbank/fungi/assembly_summary_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCA_003708985.2 PRJNA429441 SAMN08343249 PPPC00000000.2 representative genome 271357 271357 [Candida] gorgasii strain=NRRL Y-27707 latest Scaffold Major Full 2018/11/20 ASM370898v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/708/985/GCA_003708985.2_ASM370898v2 assembly from type material na -GCA_001599295.1 PRJDB3621 SAMD00028341 BCGN00000000.1 representative genome 54094 54094 Sporopachydermia quercuum strain=JCM 9486 latest Scaffold Major Full 2016/03/01 JCM_9486_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/599/295/GCA_001599295.1_JCM_9486_assembly_v001 na -GCA_001636725.1 PRJNA72737 SAMN04908328 AZHB00000000.1 representative genome 1081104 114497 Cordyceps fumosorosea ARSEF 2679 strain=ARSEF 2679 latest Scaffold Major Full 2016/05/04 ISF 1.0 Shanghai Institutes for Biological Sciences, CAS GCF_001636725.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/636/725/GCA_001636725.1_ISF_1.0 na -GCA_000467735.1 PRJNA81799 SAMN02981409 AJFL00000000.1 representative genome 1136865 37885 Rhytidhysteron rufulum CBS 306.38 strain=CBS 306.38 latest Contig Major Full 2013/09/16 ASM46773v1 Assembling the Fungal Tree of Life (AFTOL) na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/467/735/GCA_000467735.1_ASM46773v1 na -GCA_001950535.1 PRJDB3737 SAMD00028438 BCKA00000000.1 representative genome 5077 5077 Penicillium citrinum strain=JCM 22607 latest Scaffold Major Full 2016/12/09 JCM_22607_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/950/535/GCA_001950535.1_JCM_22607_assembly_v001 na -GCA_003277105.1 PRJNA396809 SAMN07436824 NPYI00000000.1 na 4932 4932 Saccharomyces cerevisiae strain=HN7 latest Chromosome Major Full 2018/11/28 ASM327710v1 Institute Of Microbiology Chinese Academy of Sciences na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/277/105/GCA_003277105.1_ASM327710v1 na -GCA_009803805.1 PRJNA487060 SAMN09910564 RAMV00000000.1 na 29879 29879 Neurospora discreta ecotype=NMWA, /strain=PS4BIDRA449 latest Scaffold Major Full 2019/12/27 ASM980380v1 University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/803/805/GCA_009803805.1_ASM980380v1 na -GCA_019207905.1 PRJNA706707 SAMN18128823 JAHLVQ000000000.1 na 460523 460523 Ogataea polymorpha Y-2423 latest Scaffold Major Full 2021/07/13 ASM1920790v1 Colorado College na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/207/905/GCA_019207905.1_ASM1920790v1 na -GCA_013282825.1 PRJNA602542 SAMN13878901 JAACJH000000000.1 na 156630 156630 Alternaria arborescens strain=NRRL 20593 latest Scaffold Major Full 2020/06/06 ASM1328282v1 US Department of Agriculture, Agriculture Research Service na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/282/825/GCA_013282825.1_ASM1328282v1 na -GCA_012656115.1 PRJNA592352 SAMN13422809 JAAAQC000000000.1 na 746128 746128 Aspergillus fumigatus strain=CNM-CM8686 latest Scaffold Major Full 2020/04/22 ASM1265611v1 UNICAMP na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/656/115/GCA_012656115.1_ASM1265611v1 na -GCA_905105095.1 PRJEB27419 SAMEA4753515 CAJHKB000000000.1 na 318829 318829 Pyricularia oryzae latest Scaffold Major Full 2020/11/22 Assembly of M.oryzae isolate BF48 genome UNIVERSITY OF EXETER na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/105/095/GCA_905105095.1_Assembly_of_M.oryzae_isolate_BF48_genome na -GCA_004917135.1 PRJNA488010 SAMN10031622 QZAJ00000000.1 na 5580 5580 Aureobasidium pullulans strain=EXF-11318 latest Contig Major Full 2019/04/26 ASM491713v1 Biotechnical Faculty, University of Ljubljana na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/917/135/GCA_004917135.1_ASM491713v1 na -GCA_007556565.1 PRJNA534185 SAMN11479276 SWCR00000000.1 representative genome 40997 40997 Elsinoe fawcettii DAR-70024 latest Scaffold Major Full 2019/07/26 ASM755656v1 Yeungnam University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/007/556/565/GCA_007556565.1_ASM755656v1 na -GCA_011022315.1 PRJNA522669 SAMN10948597 representative genome 27292 27292 Saccharomyces pastorianus strain=CBS 1483 latest Chromosome Major Full 2020/02/26 ASM1102231v1 Delft University of Technology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/022/315/GCA_011022315.1_ASM1102231v1 na -GCA_017867755.1 PRJNA680387 SAMN16879102 JAEDSJ000000000.1 na 4932 4932 Saccharomyces cerevisiae strain=SAN33 latest Scaffold Major Full 2021/04/08 ASM1786775v1 Institute of Microbiology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/867/755/GCA_017867755.1_ASM1786775v1 na -GCA_001680595.1 PRJNA289542 SAMN03857101 MAEE00000000.1 na 232081 232081 Fusarium tucumaniae strain=NRRL 31781 latest Contig Major Full 2016/07/06 ASM168059v1 Iowa State University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/680/595/GCA_001680595.1_ASM168059v1 na -GCA_018345925.1 PRJNA677929 SAMN16774514 JADPOE000000000.1 na 5518 5518 Fusarium graminearum strain=042826 latest Scaffold Major Full 2021/05/12 ASM1834592v1 University of Warmia and Mazury in Olsztyn na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/345/925/GCA_018345925.1_ASM1834592v1 na -GCA_003705455.2 PRJNA429441 SAMN08343424 PPIN00000000.2 representative genome 54552 54552 Pichia occidentalis strain=NRRL Y-7552 latest Scaffold Major Full 2018/11/20 ASM370545v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/705/455/GCA_003705455.2_ASM370545v2 assembly from type material na -GCA_000827315.1 PRJNA61203 SAMN00738176 JMDN00000000.1 representative genome 765440 80663 Piloderma croceum F 1598 strain=F 1598 latest Scaffold Major Full 2015/01/30 Piloderma croceum F 1598 v1.0 DOE Joint Genome Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/827/315/GCA_000827315.1_Piloderma_croceum_F_1598_v1.0 na -GCA_905066965.2 PRJEB40915 SAMEA7473260 CAJHIF000000000.2 na 318829 318829 Pyricularia oryzae AG059 latest Contig Major Full 2020/12/16 AG059_contigs_polished THE SAINSBURY LABORATORY na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/066/965/GCA_905066965.2_AG059_contigs_polished na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCA_000467735.1 PRJNA81799 SAMN02981409 AJFL00000000.1 representative genome 1136865 37885 Rhytidhysteron rufulum CBS 306.38 strain=CBS 306.38 na latest Contig Major Full 2013/09/16 ASM46773v1 Assembling the Fungal Tree of Life (AFTOL) na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/467/735/GCA_000467735.1_ASM46773v1 na na na haploid fungi 39856753 39856753 47.5 0 3976 3976 na na na 0 0 0 na +GCA_000827315.1 PRJNA61203 SAMN00738176 JMDN00000000.1 representative genome 765440 80663 Piloderma croceum F 1598 strain=F 1598 na latest Scaffold Major Full 2015/01/30 Piloderma croceum F 1598 v1.0 DOE Joint Genome Institute na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/827/315/GCA_000827315.1_Piloderma_croceum_F_1598_v1.0 na na na haploid fungi 59326866 52312419 46.0 0 715 4469 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 01/30/15 21607 21519 74 25706625 +GCA_001599295.1 PRJDB3621 SAMD00028341 BCGN00000000.1 representative genome 54094 54094 Sporopachydermia quercuum strain=JCM 9486 na latest Scaffold Major Full 2016/03/01 JCM_9486_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/599/295/GCA_001599295.1_JCM_9486_assembly_v001 na na na haploid fungi 16411463 16381402 36.0 0 15 15 na na na 0 0 0 30415838 +GCA_001636725.1 PRJNA72737 SAMN04908328 AZHB00000000.1 representative genome 1081104 114497 Cordyceps fumosorosea ARSEF 2679 strain=ARSEF 2679 na latest Scaffold Major Full 2016/05/04 ISF 1.0 Shanghai Institutes for Biological Sciences, CAS GCF_001636725.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/636/725/GCA_001636725.1_ISF_1.0 na na na haploid fungi 33485962 33399778 53.5 0 430 430 Shanghai Institutes for Biological Sciences, CAS Annotation submitted by Shanghai Institutes for Biological Sciences, CAS 05/04/16 10061 10061 0 27071652 +GCA_001680595.1 PRJNA289542 SAMN03857101 MAEE00000000.1 na 232081 232081 Fusarium tucumaniae strain=NRRL 31781 na latest Contig Major Full 2016/07/06 ASM168059v1 Iowa State University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/680/595/GCA_001680595.1_ASM168059v1 na na na haploid fungi 49676379 49676379 49.5 0 22979 22979 na na na 0 0 0 na +GCA_001950535.1 PRJDB3737 SAMD00028438 BCKA00000000.1 na 5077 5077 Penicillium citrinum strain=JCM 22607 na latest Scaffold Major Full 2016/12/09 JCM_22607_assembly_v001 RIKEN Center for Life Science Technologies, Division of Genomic Technologies na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/950/535/GCA_001950535.1_JCM_22607_assembly_v001 na na na haploid fungi 33998198 33656517 46.0 0 50 50 na na na 0 0 0 na +GCA_003277105.1 PRJNA396809 SAMN07436824 NPYI00000000.1 na 4932 4932 Saccharomyces cerevisiae strain=HN7 na latest Chromosome Major Full 2018/11/28 ASM327710v1 Institute Of Microbiology Chinese Academy of Sciences na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/277/105/GCA_003277105.1_ASM327710v1 na na na haploid fungi 11674929 11572329 38.0 16 56 56 na na na 0 0 0 na +GCA_003705455.2 PRJNA429441 SAMN08343424 PPIN00000000.2 na 54552 54552 Pichia occidentalis strain=NRRL Y-7552 na latest Scaffold Major Full 2018/11/20 ASM370545v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/705/455/GCA_003705455.2_ASM370545v2 na assembly from type material na haploid fungi 12024331 12017260 40.0 0 720 720 na na na 0 0 0 30415838 +GCA_003708985.2 PRJNA429441 SAMN08343249 PPPC00000000.2 representative genome 271357 271357 [Candida] gorgasii strain=NRRL Y-27707 na latest Scaffold Major Full 2018/11/20 ASM370898v2 UW-Madison na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/708/985/GCA_003708985.2_ASM370898v2 na assembly from type material na haploid fungi 11351459 11351155 36.0 0 95 95 na na na 0 0 0 30415838 +GCA_004917135.1 PRJNA488010 SAMN10031622 QZAJ00000000.1 na 5580 5580 Aureobasidium pullulans strain=EXF-11318 na latest Contig Major Full 2019/04/26 ASM491713v1 Biotechnical Faculty, University of Ljubljana na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/917/135/GCA_004917135.1_ASM491713v1 na na na haploid fungi 28116540 28116540 50.5 0 1490 1490 Biotechnical Faculty, University of Ljubljana Annotation submitted by Biotechnical Faculty, University of Ljubljana 04/26/19 10706 10706 0 na +GCA_007556565.1 PRJNA534185 SAMN11479276 SWCR00000000.1 representative genome 40997 40997 Elsinoe fawcettii na DAR-70024 latest Scaffold Major Full 2019/07/26 ASM755656v1 Yeungnam University na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/007/556/565/GCA_007556565.1_ASM755656v1 na na na haploid fungi 26325864 26260432 52.0 0 53 53 na na na 0 0 0 na +GCA_009803805.1 PRJNA487060 SAMN09910564 RAMV00000000.1 na 29879 29879 Neurospora discreta strain=PS4BIDRA449, /ecotype=NMWA na latest Scaffold Major Full 2019/12/27 ASM980380v1 University of California, Berkeley na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/803/805/GCA_009803805.1_ASM980380v1 na na na haploid fungi 37242299 37238495 49.0 0 778 778 na na na 0 0 0 na +GCA_011022315.1 PRJNA522669 SAMN10948597 na representative genome 27292 27292 Saccharomyces pastorianus strain=CBS 1483 na latest Chromosome Major Full 2020/02/26 ASM1102231v1 Delft University of Technology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/011/022/315/GCA_011022315.1_ASM1102231v1 na na na haploid fungi 22966002 22963002 39.0 31 31 31 Delft University of Technology Annotation submitted by Delft University of Technology 02/26/20 11312 10760 552 31791228 +GCA_012656115.1 PRJNA592352 SAMN13422809 JAAAQC000000000.1 na 746128 746128 Aspergillus fumigatus strain=CNM-CM8686 na latest Scaffold Major Full 2020/04/22 ASM1265611v1 UNICAMP na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/656/115/GCA_012656115.1_ASM1265611v1 na na na haploid fungi 28139937 28139480 49.5 0 1480 1480 UNICAMP Annotation submitted by UNICAMP 04/22/20 8938 8938 0 na +GCA_013282825.1 PRJNA602542 SAMN13878901 JAACJH000000000.1 na 156630 156630 Alternaria arborescens strain=NRRL 20593 na latest Scaffold Major Full 2020/06/06 ASM1328282v1 US Department of Agriculture, Agriculture Research Service na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/282/825/GCA_013282825.1_ASM1328282v1 na na na haploid fungi 33589656 33587128 51.0 0 701 701 na na na 0 0 0 na +GCA_017867755.1 PRJNA680387 SAMN16879102 JAEDSJ000000000.1 na 4932 4932 Saccharomyces cerevisiae strain=SAN33 na latest Scaffold Major Full 2021/04/08 ASM1786775v1 Institute of Microbiology na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/017/867/755/GCA_017867755.1_ASM1786775v1 na na na haploid fungi 11636434 11622311 38.0 0 602 602 na na na 0 0 0 33679656 +GCA_018345925.1 PRJNA677929 SAMN16774514 JADPOE000000000.1 na 5518 5518 Fusarium graminearum strain=042826 na latest Scaffold Major Full 2021/05/12 ASM1834592v1 University of Warmia and Mazury in Olsztyn na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/345/925/GCA_018345925.1_ASM1834592v1 na na na haploid fungi 36060139 36060079 48.0 0 28 28 na na na 0 0 0 na +GCA_019207905.1 PRJNA706707 SAMN18128823 JAHLVQ000000000.1 na 460523 460523 Ogataea polymorpha na Y-2423 latest Scaffold Major Full 2021/07/13 ASM1920790v1 Colorado College na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/019/207/905/GCA_019207905.1_ASM1920790v1 na na na haploid fungi 8889312 8889172 47.5 0 64 64 Colorado College Annotation submitted by Colorado College 07/13/21 5436 5434 0 na +GCA_905066965.2 PRJEB40915 SAMEA7473260 CAJHIF000000000.2 na 318829 318829 Pyricularia oryzae na AG059 latest Contig Major Full 2020/12/16 AG059_contigs_polished THE SAINSBURY LABORATORY na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/066/965/GCA_905066965.2_AG059_contigs_polished na na na haploid fungi 47743121 47743121 50.5 0 37 37 na na na 0 0 0 na +GCA_905105095.1 PRJEB27419 SAMEA4753515 CAJHKB000000000.1 na 318829 318829 Pyricularia oryzae na na latest Scaffold Major Full 2020/11/22 Assembly of M.oryzae isolate BF48 genome UNIVERSITY OF EXETER na na https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/105/095/GCA_905105095.1_Assembly_of_M.oryzae_isolate_BF48_genome na na na haploid fungi 40015456 39526353 51.0 0 4592 4592 na na na 0 0 0 na diff --git a/tests/files/genomes/refseq/archaea/assembly_summary.txt b/tests/files/genomes/refseq/archaea/assembly_summary.txt index 7019e3d..008591f 100644 --- a/tests/files/genomes/refseq/archaea/assembly_summary.txt +++ b/tests/files/genomes/refseq/archaea/assembly_summary.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_004137855.1 PRJNA224116 SAMN08804086 QBKB00000000.1 representative genome 2138083 2138083 Methanohalophilus profundi strain=SLHTYRO latest Scaffold Major Full 2019/02/05 ASM413785v1 UBO GCA_004137855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/137/855/GCF_004137855.1_ASM413785v1 assembly from type material na -GCF_009184545.1 PRJNA224116 SAMN09291540 QJOW00000000.1 representative genome 2212850 2212850 Halosegnis rubeus strain=F17-44 latest Scaffold Major Full 2019/10/19 ASM918454v1 University of Sevilla, Spain GCA_009184545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/184/545/GCF_009184545.1_ASM918454v1 assembly from type material na -GCF_009674625.1 PRJNA224116 SAMN13255728 WKJQ00000000.1 representative genome 2666143 2666143 Haloferax marinum strain=MBLA0078 latest Contig Major Full 2019/11/19 ASM967462v1 Incheon National University GCA_009674625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/674/625/GCF_009674625.1_ASM967462v1 assembly from type material na -GCF_002494345.1 PRJNA224116 SAMN07714153 NXNI00000000.1 representative genome 373386 373386 Natrinema ejinorense strain=JCM 13890 latest Contig Major Full 2017/10/03 ASM249434v1 World Institute of Kimchi GCA_002494345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/494/345/GCF_002494345.1_ASM249434v1 assembly from type material na -GCF_000513855.1 PRJNA224116 SAMN02597199 AZUU00000000.1 na 1150674 94694 Desulfurococcus amylolyticus Z-533 strain=Z-533 latest Scaffold Major Full 2014/01/07 ASM51385v1 DOE Joint Genome Institute GCA_000513855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/513/855/GCF_000513855.1_ASM51385v1 assembly from type material na -GCF_900116205.1 PRJNA224116 SAMN04488556 FOZS00000000.1 representative genome 619731 619731 Halostagnicola kamekurae strain=DSM 22427 latest Contig Major Full 2016/11/02 IMG-taxon 2639762563 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900116205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/116/205/GCF_900116205.1_IMG-taxon_2639762563_annotated_assembly assembly from type material na -GCF_002215305.1 PRJNA224116 SAMN05822533 MKFG00000000.1 na 2247 2247 Halorubrum lacusprofundi strain=DL18 latest Contig Major Full 2017/07/06 ASM221530v1 University of NSW GCA_002215305.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/215/305/GCF_002215305.1_ASM221530v1 na -GCF_014202515.1 PRJNA224116 SAMN14908392 JACHGX000000000.1 na 2242 2242 Halobacterium salinarum strain=DSM 669 latest Contig Major Full 2020/08/14 ASM1420251v1 DOE Joint Genome Institute GCA_014202515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/014/202/515/GCF_014202515.1_ASM1420251v1 assembly from synonym type material na -GCF_002761295.1 PRJNA224116 SAMN05908879 representative genome 39664 39664 Methanohalophilus portucalensis strain=FDF-1T latest Chromosome Major Full 2017/11/07 ASM276129v1 Macumba GCA_002761295.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/761/295/GCF_002761295.1_ASM276129v1 assembly from type material na -GCF_000187225.1 PRJNA224116 SAMN02470763 AEMG00000000.1 na 797209 367189 Haladaptatus paucihalophilus DX253 strain=DX253 latest Contig Major Full 2011/01/31 ASM18722v1 Oklahoma State University GCA_000187225.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/187/225/GCF_000187225.1_ASM18722v1 assembly from type material na -GCF_005435225.1 PRJNA224116 SAMN10910413 SGXX00000000.1 na 1855858 1855858 Halorubrum sp. ASP121 strain=ASP121 latest Contig Major Full 2019/05/16 ASM543522v1 University of Connecticut GCA_005435225.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/435/225/GCF_005435225.1_ASM543522v1 na -GCF_001560685.1 PRJNA224116 SAMN04305175 LPSN00000000.1 na 2285 2285 Sulfolobus acidocaldarius strain=NG05B_CO5_08 latest Contig Major Full 2016/02/11 NG05B_CO5_08 University of Illinois at Urbana-Champaign GCA_001560685.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/560/685/GCF_001560685.1_NG05B_CO5_08 na -GCF_900215575.1 PRJNA224116 SAMN06269185 OBEJ00000000.1 representative genome 558529 558529 Natronoarchaeum philippinense strain=DSM 27208 latest Contig Major Full 2017/09/28 IMG-taxon 2728369221 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900215575.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/215/575/GCF_900215575.1_IMG-taxon_2728369221_annotated_assembly assembly from type material na -GCF_011319465.1 PRJNA224116 SAMN09786340 RCMB00000000.1 na 2341020 2341020 Candidatus Nitrosotalea sp. TS strain=TS latest Contig Major Full 2020/03/16 ASM1131946v1 Chinese Academy of Sciences GCA_011319465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/319/465/GCF_011319465.1_ASM1131946v1 na -GCF_005406325.1 PRJNA224116 SAMN11356524 representative genome 523841 2252 Haloferax mediterranei ATCC 33500 strain=ATCC 33500 latest Complete Genome Major Full 2019/05/16 ASM540632v1 University of Maryland GCA_005406325.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/406/325/GCF_005406325.1_ASM540632v1 assembly from type material na -GCF_005222525.1 PRJNA224116 SAMN09071532 representative genome 47304 47304 Metallosphaera prunae strain=Ron 12/II latest Complete Genome Major Full 2019/05/07 ASM522252v1 North Carolina State University GCA_005222525.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/222/525/GCF_005222525.1_ASM522252v1 assembly from type material na -GCF_000022465.1 PRJNA224116 SAMN02598422 na 439386 43080 Sulfolobus islandicus Y.G.57.14 strain=Y.G.57.14 latest Complete Genome Major Full 2009/04/29 ASM2246v1 US DOE Joint Genome Institute GCA_000022465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/022/465/GCF_000022465.1_ASM2246v1 na -GCF_000400975.1 PRJNA224116 SAMD00036650 BANO00000000.1 na 1261545 489138 Halarchaeum acidiphilum MH1-52-1 strain=MH1-52-1 latest Contig Major Full 2013/05/16 ASM40097v1 Japan Agency for Marine-Earth Science and Technology GCA_000400975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/400/975/GCF_000400975.1_ASM40097v1 assembly from type material na -GCF_000245175.1 PRJNA224116 SAMN02471819 AHJO00000000.1 na 1132501 43080 Sulfolobus islandicus M.16.23 strain=M.16.23 latest Chromosome Major Full 2012/01/25 ASM24517v2 University of Illinois GCA_000245175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/245/175/GCF_000245175.1_ASM24517v2 na -GCF_000517445.1 PRJNA224116 SAMN03081513 representative genome 582419 582419 Thermococcus paralvinellae strain=ES1 latest Complete Genome Major Full 2014/01/10 ASM51744v1 Kyung Hee University GCA_000517445.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/517/445/GCF_000517445.1_ASM51744v1 assembly from type material na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000022465.1 PRJNA224116 SAMN02598422 na na 439386 43080 Sulfolobus islandicus Y.G.57.14 strain=Y.G.57.14 na latest Complete Genome Major Full 2009/04/29 ASM2246v1 US DOE Joint Genome Institute GCA_000022465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/022/465/GCF_000022465.1_ASM2246v1 na na na haploid archaea 2702058 2702058 35.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 3029 2767 53 19435847 +GCF_000187225.1 PRJNA224116 SAMN02470763 AEMG00000000.1 na 797209 367189 Haladaptatus paucihalophilus DX253 strain=DX253 na latest Contig Major Full 2011/01/31 ASM18722v1 Oklahoma State University GCA_000187225.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/187/225/GCF_000187225.1_ASM18722v1 na assembly from type material na haploid archaea 4284805 4284805 61.5 0 32 32 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/12/22 4383 4296 57 24048226 +GCF_000245175.1 PRJNA224116 SAMN02471819 AHJO00000000.1 na 1132501 43080 Sulfolobus islandicus M.16.23 strain=M.16.23 na latest Chromosome Major Full 2012/01/25 ASM24517v2 University of Illinois GCA_000245175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/245/175/GCF_000245175.1_ASM24517v2 na na na haploid archaea 2601131 2601131 35.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/28/19 2873 2619 51 22363207 +GCF_000400975.1 PRJNA224116 SAMD00036650 BANO00000000.1 na 1261545 489138 Halarchaeum acidiphilum MH1-52-1 strain=MH1-52-1 na latest Contig Major Full 2013/05/16 ASM40097v1 Japan Agency for Marine-Earth Science and Technology GCA_000400975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/400/975/GCF_000400975.1_ASM40097v1 na assembly from type material na haploid archaea 2431759 2431759 66.5 0 537 537 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/07/22 2888 2397 45 na +GCF_000513855.1 PRJNA224116 SAMN02597199 AZUU00000000.1 na 1150674 94694 Desulfurococcus amylolyticus Z-533 strain=Z-533 na latest Scaffold Major Full 2014/01/07 ASM51385v1 DOE Joint Genome Institute GCA_000513855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/513/855/GCF_000513855.1_ASM51385v1 na assembly from type material na haploid archaea 1307099 1306999 45.0 0 1 2 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 1436 1374 52 na +GCF_000517445.1 PRJNA224116 SAMN03081513 na representative genome 582419 582419 Thermococcus paralvinellae strain=ES1 na latest Complete Genome Major Full 2014/01/10 ASM51744v1 Kyung Hee University GCA_000517445.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/517/445/GCF_000517445.1_ASM51744v1 na assembly from type material na haploid archaea 1957742 1957742 40.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/10/22 2180 2103 52 25082851 +GCF_001560685.1 PRJNA224116 SAMN04305175 LPSN00000000.1 na 2285 2285 Sulfolobus acidocaldarius strain=NG05B_CO5_08 na latest Contig Major Full 2016/02/11 NG05B_CO5_08 University of Illinois at Urbana-Champaign GCA_001560685.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/560/685/GCF_001560685.1_NG05B_CO5_08 na na na haploid archaea 2178872 2178872 36.5 0 8 8 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 2350 2248 51 na +GCF_002215305.1 PRJNA224116 SAMN05822533 MKFG00000000.1 na 2247 2247 Halorubrum lacusprofundi strain=DL18 na latest Contig Major Full 2017/07/06 ASM221530v1 University of NSW GCA_002215305.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/215/305/GCF_002215305.1_ASM221530v1 na na na haploid archaea 3633750 3633750 64.0 0 193 193 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/22/23 3721 3501 64 na +GCF_002494345.1 PRJNA224116 SAMN07714153 NXNI00000000.1 representative genome 373386 373386 Natrinema ejinorense strain=JCM 13890 na latest Contig Major Full 2017/10/03 ASM249434v1 World Institute of Kimchi GCA_002494345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/494/345/GCF_002494345.1_ASM249434v1 na assembly from type material na haploid archaea 4476324 4476324 63.5 0 3 3 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/04/23 4456 4312 61 na +GCF_002761295.1 PRJNA224116 SAMN05908879 na representative genome 39664 39664 Methanohalophilus portucalensis strain=FDF-1T na latest Chromosome Major Full 2017/11/07 ASM276129v1 Macumba GCA_002761295.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/761/295/GCF_002761295.1_ASM276129v1 na assembly from type material na haploid archaea 2084975 2084275 41.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/27/22 2161 2080 58 na +GCF_004137855.1 PRJNA224116 SAMN08804086 QBKB00000000.1 representative genome 2138083 2138083 Methanohalophilus profundi strain=SLHTYRO na latest Scaffold Major Full 2019/02/05 ASM413785v1 UBO GCA_004137855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/137/855/GCF_004137855.1_ASM413785v1 na assembly from type material na haploid archaea 1830088 1829200 42.5 0 4 4 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/23/22 2077 1843 51 na +GCF_005222525.1 PRJNA224116 SAMN09071532 na representative genome 47304 47304 Metallosphaera prunae strain=Ron 12/II na latest Complete Genome Major Full 2019/05/07 ASM522252v1 North Carolina State University GCA_005222525.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/222/525/GCF_005222525.1_ASM522252v1 na assembly from type material na haploid archaea 2202576 2202576 46.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/26/22 2413 2345 51 na +GCF_005406325.1 PRJNA224116 SAMN11356524 na representative genome 523841 2252 Haloferax mediterranei ATCC 33500 strain=ATCC 33500 na latest Complete Genome Major Full 2019/05/16 ASM540632v1 University of Maryland GCA_005406325.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/406/325/GCF_005406325.1_ASM540632v1 na assembly from type material na haploid archaea 3907473 3907473 60.0 4 4 4 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/13/22 3937 3802 65 31270201 +GCF_005435225.1 PRJNA224116 SAMN10910413 SGXX00000000.1 na 1855858 1855858 Halorubrum sp. ASP121 strain=ASP121 na latest Contig Major Full 2019/05/16 ASM543522v1 University of Connecticut GCA_005435225.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/435/225/GCF_005435225.1_ASM543522v1 na na na haploid archaea 3378175 3378175 68.0 0 147 147 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/13/23 3551 3394 54 30893937 +GCF_009184545.1 PRJNA224116 SAMN09291540 QJOW00000000.1 representative genome 2212850 2212850 Halosegnis rubeus strain=F17-44 na latest Scaffold Major Full 2019/10/19 ASM918454v1 University of Sevilla, Spain GCA_009184545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/184/545/GCF_009184545.1_ASM918454v1 na assembly from type material na haploid archaea 2660599 2660501 64.0 2 12 12 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/14/22 2862 2766 51 na +GCF_009674625.1 PRJNA224116 SAMN13255728 WKJQ00000000.1 representative genome 2666143 2666143 Haloferax marinum strain=MBLA0078 na latest Contig Major Full 2019/11/19 ASM967462v1 Incheon National University GCA_009674625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/674/625/GCF_009674625.1_ASM967462v1 na assembly from type material na haploid archaea 3480702 3480702 61.0 0 3 3 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/01/22 3517 3322 64 na +GCF_011319465.1 PRJNA224116 SAMN09786340 RCMB00000000.1 na 2341020 2341020 Candidatus Nitrosotalea sp. TS strain=TS na latest Contig Major Full 2020/03/16 ASM1131946v1 Chinese Academy of Sciences GCA_011319465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/319/465/GCF_011319465.1_ASM1131946v1 na na na haploid archaea 1540195 1540195 40.0 0 24 24 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/27/22 2140 1823 44 31462715 +GCF_014202515.1 PRJNA224116 SAMN14908392 JACHGX000000000.1 na 2242 2242 Halobacterium salinarum strain=DSM 669 na latest Contig Major Full 2020/08/14 ASM1420251v1 DOE Joint Genome Institute GCA_014202515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/014/202/515/GCF_014202515.1_ASM1420251v1 na assembly from synonym type material na haploid archaea 2368961 2368961 66.5 0 37 37 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/15/22 2511 2349 52 na +GCF_900116205.1 PRJNA224116 SAMN04488556 FOZS00000000.1 representative genome 619731 619731 Halostagnicola kamekurae strain=DSM 22427 na latest Contig Major Full 2016/11/02 IMG-taxon 2639762563 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900116205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/116/205/GCF_900116205.1_IMG-taxon_2639762563_annotated_assembly na assembly from type material na haploid archaea 4108147 4108147 61.0 0 16 16 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/04/23 4141 3930 58 na +GCF_900215575.1 PRJNA224116 SAMN06269185 OBEJ00000000.1 representative genome 558529 558529 Natronoarchaeum philippinense strain=DSM 27208 na latest Contig Major Full 2017/09/28 IMG-taxon 2728369221 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900215575.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/215/575/GCF_900215575.1_IMG-taxon_2728369221_annotated_assembly na assembly from type material na haploid archaea 3164179 3164179 65.0 0 11 11 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/04/22 3324 3239 54 na diff --git a/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt b/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt index 7019e3d..008591f 100644 --- a/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt +++ b/tests/files/genomes/refseq/archaea/assembly_summary_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_004137855.1 PRJNA224116 SAMN08804086 QBKB00000000.1 representative genome 2138083 2138083 Methanohalophilus profundi strain=SLHTYRO latest Scaffold Major Full 2019/02/05 ASM413785v1 UBO GCA_004137855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/137/855/GCF_004137855.1_ASM413785v1 assembly from type material na -GCF_009184545.1 PRJNA224116 SAMN09291540 QJOW00000000.1 representative genome 2212850 2212850 Halosegnis rubeus strain=F17-44 latest Scaffold Major Full 2019/10/19 ASM918454v1 University of Sevilla, Spain GCA_009184545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/184/545/GCF_009184545.1_ASM918454v1 assembly from type material na -GCF_009674625.1 PRJNA224116 SAMN13255728 WKJQ00000000.1 representative genome 2666143 2666143 Haloferax marinum strain=MBLA0078 latest Contig Major Full 2019/11/19 ASM967462v1 Incheon National University GCA_009674625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/674/625/GCF_009674625.1_ASM967462v1 assembly from type material na -GCF_002494345.1 PRJNA224116 SAMN07714153 NXNI00000000.1 representative genome 373386 373386 Natrinema ejinorense strain=JCM 13890 latest Contig Major Full 2017/10/03 ASM249434v1 World Institute of Kimchi GCA_002494345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/494/345/GCF_002494345.1_ASM249434v1 assembly from type material na -GCF_000513855.1 PRJNA224116 SAMN02597199 AZUU00000000.1 na 1150674 94694 Desulfurococcus amylolyticus Z-533 strain=Z-533 latest Scaffold Major Full 2014/01/07 ASM51385v1 DOE Joint Genome Institute GCA_000513855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/513/855/GCF_000513855.1_ASM51385v1 assembly from type material na -GCF_900116205.1 PRJNA224116 SAMN04488556 FOZS00000000.1 representative genome 619731 619731 Halostagnicola kamekurae strain=DSM 22427 latest Contig Major Full 2016/11/02 IMG-taxon 2639762563 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900116205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/116/205/GCF_900116205.1_IMG-taxon_2639762563_annotated_assembly assembly from type material na -GCF_002215305.1 PRJNA224116 SAMN05822533 MKFG00000000.1 na 2247 2247 Halorubrum lacusprofundi strain=DL18 latest Contig Major Full 2017/07/06 ASM221530v1 University of NSW GCA_002215305.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/215/305/GCF_002215305.1_ASM221530v1 na -GCF_014202515.1 PRJNA224116 SAMN14908392 JACHGX000000000.1 na 2242 2242 Halobacterium salinarum strain=DSM 669 latest Contig Major Full 2020/08/14 ASM1420251v1 DOE Joint Genome Institute GCA_014202515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/014/202/515/GCF_014202515.1_ASM1420251v1 assembly from synonym type material na -GCF_002761295.1 PRJNA224116 SAMN05908879 representative genome 39664 39664 Methanohalophilus portucalensis strain=FDF-1T latest Chromosome Major Full 2017/11/07 ASM276129v1 Macumba GCA_002761295.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/761/295/GCF_002761295.1_ASM276129v1 assembly from type material na -GCF_000187225.1 PRJNA224116 SAMN02470763 AEMG00000000.1 na 797209 367189 Haladaptatus paucihalophilus DX253 strain=DX253 latest Contig Major Full 2011/01/31 ASM18722v1 Oklahoma State University GCA_000187225.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/187/225/GCF_000187225.1_ASM18722v1 assembly from type material na -GCF_005435225.1 PRJNA224116 SAMN10910413 SGXX00000000.1 na 1855858 1855858 Halorubrum sp. ASP121 strain=ASP121 latest Contig Major Full 2019/05/16 ASM543522v1 University of Connecticut GCA_005435225.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/435/225/GCF_005435225.1_ASM543522v1 na -GCF_001560685.1 PRJNA224116 SAMN04305175 LPSN00000000.1 na 2285 2285 Sulfolobus acidocaldarius strain=NG05B_CO5_08 latest Contig Major Full 2016/02/11 NG05B_CO5_08 University of Illinois at Urbana-Champaign GCA_001560685.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/560/685/GCF_001560685.1_NG05B_CO5_08 na -GCF_900215575.1 PRJNA224116 SAMN06269185 OBEJ00000000.1 representative genome 558529 558529 Natronoarchaeum philippinense strain=DSM 27208 latest Contig Major Full 2017/09/28 IMG-taxon 2728369221 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900215575.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/215/575/GCF_900215575.1_IMG-taxon_2728369221_annotated_assembly assembly from type material na -GCF_011319465.1 PRJNA224116 SAMN09786340 RCMB00000000.1 na 2341020 2341020 Candidatus Nitrosotalea sp. TS strain=TS latest Contig Major Full 2020/03/16 ASM1131946v1 Chinese Academy of Sciences GCA_011319465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/319/465/GCF_011319465.1_ASM1131946v1 na -GCF_005406325.1 PRJNA224116 SAMN11356524 representative genome 523841 2252 Haloferax mediterranei ATCC 33500 strain=ATCC 33500 latest Complete Genome Major Full 2019/05/16 ASM540632v1 University of Maryland GCA_005406325.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/406/325/GCF_005406325.1_ASM540632v1 assembly from type material na -GCF_005222525.1 PRJNA224116 SAMN09071532 representative genome 47304 47304 Metallosphaera prunae strain=Ron 12/II latest Complete Genome Major Full 2019/05/07 ASM522252v1 North Carolina State University GCA_005222525.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/222/525/GCF_005222525.1_ASM522252v1 assembly from type material na -GCF_000022465.1 PRJNA224116 SAMN02598422 na 439386 43080 Sulfolobus islandicus Y.G.57.14 strain=Y.G.57.14 latest Complete Genome Major Full 2009/04/29 ASM2246v1 US DOE Joint Genome Institute GCA_000022465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/022/465/GCF_000022465.1_ASM2246v1 na -GCF_000400975.1 PRJNA224116 SAMD00036650 BANO00000000.1 na 1261545 489138 Halarchaeum acidiphilum MH1-52-1 strain=MH1-52-1 latest Contig Major Full 2013/05/16 ASM40097v1 Japan Agency for Marine-Earth Science and Technology GCA_000400975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/400/975/GCF_000400975.1_ASM40097v1 assembly from type material na -GCF_000245175.1 PRJNA224116 SAMN02471819 AHJO00000000.1 na 1132501 43080 Sulfolobus islandicus M.16.23 strain=M.16.23 latest Chromosome Major Full 2012/01/25 ASM24517v2 University of Illinois GCA_000245175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/245/175/GCF_000245175.1_ASM24517v2 na -GCF_000517445.1 PRJNA224116 SAMN03081513 representative genome 582419 582419 Thermococcus paralvinellae strain=ES1 latest Complete Genome Major Full 2014/01/10 ASM51744v1 Kyung Hee University GCA_000517445.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/517/445/GCF_000517445.1_ASM51744v1 assembly from type material na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000022465.1 PRJNA224116 SAMN02598422 na na 439386 43080 Sulfolobus islandicus Y.G.57.14 strain=Y.G.57.14 na latest Complete Genome Major Full 2009/04/29 ASM2246v1 US DOE Joint Genome Institute GCA_000022465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/022/465/GCF_000022465.1_ASM2246v1 na na na haploid archaea 2702058 2702058 35.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 3029 2767 53 19435847 +GCF_000187225.1 PRJNA224116 SAMN02470763 AEMG00000000.1 na 797209 367189 Haladaptatus paucihalophilus DX253 strain=DX253 na latest Contig Major Full 2011/01/31 ASM18722v1 Oklahoma State University GCA_000187225.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/187/225/GCF_000187225.1_ASM18722v1 na assembly from type material na haploid archaea 4284805 4284805 61.5 0 32 32 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/12/22 4383 4296 57 24048226 +GCF_000245175.1 PRJNA224116 SAMN02471819 AHJO00000000.1 na 1132501 43080 Sulfolobus islandicus M.16.23 strain=M.16.23 na latest Chromosome Major Full 2012/01/25 ASM24517v2 University of Illinois GCA_000245175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/245/175/GCF_000245175.1_ASM24517v2 na na na haploid archaea 2601131 2601131 35.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/28/19 2873 2619 51 22363207 +GCF_000400975.1 PRJNA224116 SAMD00036650 BANO00000000.1 na 1261545 489138 Halarchaeum acidiphilum MH1-52-1 strain=MH1-52-1 na latest Contig Major Full 2013/05/16 ASM40097v1 Japan Agency for Marine-Earth Science and Technology GCA_000400975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/400/975/GCF_000400975.1_ASM40097v1 na assembly from type material na haploid archaea 2431759 2431759 66.5 0 537 537 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/07/22 2888 2397 45 na +GCF_000513855.1 PRJNA224116 SAMN02597199 AZUU00000000.1 na 1150674 94694 Desulfurococcus amylolyticus Z-533 strain=Z-533 na latest Scaffold Major Full 2014/01/07 ASM51385v1 DOE Joint Genome Institute GCA_000513855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/513/855/GCF_000513855.1_ASM51385v1 na assembly from type material na haploid archaea 1307099 1306999 45.0 0 1 2 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 1436 1374 52 na +GCF_000517445.1 PRJNA224116 SAMN03081513 na representative genome 582419 582419 Thermococcus paralvinellae strain=ES1 na latest Complete Genome Major Full 2014/01/10 ASM51744v1 Kyung Hee University GCA_000517445.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/517/445/GCF_000517445.1_ASM51744v1 na assembly from type material na haploid archaea 1957742 1957742 40.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/10/22 2180 2103 52 25082851 +GCF_001560685.1 PRJNA224116 SAMN04305175 LPSN00000000.1 na 2285 2285 Sulfolobus acidocaldarius strain=NG05B_CO5_08 na latest Contig Major Full 2016/02/11 NG05B_CO5_08 University of Illinois at Urbana-Champaign GCA_001560685.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/560/685/GCF_001560685.1_NG05B_CO5_08 na na na haploid archaea 2178872 2178872 36.5 0 8 8 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 2350 2248 51 na +GCF_002215305.1 PRJNA224116 SAMN05822533 MKFG00000000.1 na 2247 2247 Halorubrum lacusprofundi strain=DL18 na latest Contig Major Full 2017/07/06 ASM221530v1 University of NSW GCA_002215305.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/215/305/GCF_002215305.1_ASM221530v1 na na na haploid archaea 3633750 3633750 64.0 0 193 193 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/22/23 3721 3501 64 na +GCF_002494345.1 PRJNA224116 SAMN07714153 NXNI00000000.1 representative genome 373386 373386 Natrinema ejinorense strain=JCM 13890 na latest Contig Major Full 2017/10/03 ASM249434v1 World Institute of Kimchi GCA_002494345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/494/345/GCF_002494345.1_ASM249434v1 na assembly from type material na haploid archaea 4476324 4476324 63.5 0 3 3 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/04/23 4456 4312 61 na +GCF_002761295.1 PRJNA224116 SAMN05908879 na representative genome 39664 39664 Methanohalophilus portucalensis strain=FDF-1T na latest Chromosome Major Full 2017/11/07 ASM276129v1 Macumba GCA_002761295.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/761/295/GCF_002761295.1_ASM276129v1 na assembly from type material na haploid archaea 2084975 2084275 41.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/27/22 2161 2080 58 na +GCF_004137855.1 PRJNA224116 SAMN08804086 QBKB00000000.1 representative genome 2138083 2138083 Methanohalophilus profundi strain=SLHTYRO na latest Scaffold Major Full 2019/02/05 ASM413785v1 UBO GCA_004137855.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/137/855/GCF_004137855.1_ASM413785v1 na assembly from type material na haploid archaea 1830088 1829200 42.5 0 4 4 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/23/22 2077 1843 51 na +GCF_005222525.1 PRJNA224116 SAMN09071532 na representative genome 47304 47304 Metallosphaera prunae strain=Ron 12/II na latest Complete Genome Major Full 2019/05/07 ASM522252v1 North Carolina State University GCA_005222525.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/222/525/GCF_005222525.1_ASM522252v1 na assembly from type material na haploid archaea 2202576 2202576 46.0 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/26/22 2413 2345 51 na +GCF_005406325.1 PRJNA224116 SAMN11356524 na representative genome 523841 2252 Haloferax mediterranei ATCC 33500 strain=ATCC 33500 na latest Complete Genome Major Full 2019/05/16 ASM540632v1 University of Maryland GCA_005406325.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/406/325/GCF_005406325.1_ASM540632v1 na assembly from type material na haploid archaea 3907473 3907473 60.0 4 4 4 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/13/22 3937 3802 65 31270201 +GCF_005435225.1 PRJNA224116 SAMN10910413 SGXX00000000.1 na 1855858 1855858 Halorubrum sp. ASP121 strain=ASP121 na latest Contig Major Full 2019/05/16 ASM543522v1 University of Connecticut GCA_005435225.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/005/435/225/GCF_005435225.1_ASM543522v1 na na na haploid archaea 3378175 3378175 68.0 0 147 147 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/13/23 3551 3394 54 30893937 +GCF_009184545.1 PRJNA224116 SAMN09291540 QJOW00000000.1 representative genome 2212850 2212850 Halosegnis rubeus strain=F17-44 na latest Scaffold Major Full 2019/10/19 ASM918454v1 University of Sevilla, Spain GCA_009184545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/184/545/GCF_009184545.1_ASM918454v1 na assembly from type material na haploid archaea 2660599 2660501 64.0 2 12 12 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/14/22 2862 2766 51 na +GCF_009674625.1 PRJNA224116 SAMN13255728 WKJQ00000000.1 representative genome 2666143 2666143 Haloferax marinum strain=MBLA0078 na latest Contig Major Full 2019/11/19 ASM967462v1 Incheon National University GCA_009674625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/674/625/GCF_009674625.1_ASM967462v1 na assembly from type material na haploid archaea 3480702 3480702 61.0 0 3 3 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/01/22 3517 3322 64 na +GCF_011319465.1 PRJNA224116 SAMN09786340 RCMB00000000.1 na 2341020 2341020 Candidatus Nitrosotalea sp. TS strain=TS na latest Contig Major Full 2020/03/16 ASM1131946v1 Chinese Academy of Sciences GCA_011319465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/319/465/GCF_011319465.1_ASM1131946v1 na na na haploid archaea 1540195 1540195 40.0 0 24 24 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/27/22 2140 1823 44 31462715 +GCF_014202515.1 PRJNA224116 SAMN14908392 JACHGX000000000.1 na 2242 2242 Halobacterium salinarum strain=DSM 669 na latest Contig Major Full 2020/08/14 ASM1420251v1 DOE Joint Genome Institute GCA_014202515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/014/202/515/GCF_014202515.1_ASM1420251v1 na assembly from synonym type material na haploid archaea 2368961 2368961 66.5 0 37 37 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/15/22 2511 2349 52 na +GCF_900116205.1 PRJNA224116 SAMN04488556 FOZS00000000.1 representative genome 619731 619731 Halostagnicola kamekurae strain=DSM 22427 na latest Contig Major Full 2016/11/02 IMG-taxon 2639762563 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900116205.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/116/205/GCF_900116205.1_IMG-taxon_2639762563_annotated_assembly na assembly from type material na haploid archaea 4108147 4108147 61.0 0 16 16 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/04/23 4141 3930 58 na +GCF_900215575.1 PRJNA224116 SAMN06269185 OBEJ00000000.1 representative genome 558529 558529 Natronoarchaeum philippinense strain=DSM 27208 na latest Contig Major Full 2017/09/28 IMG-taxon 2728369221 annotated assembly DOE - JOINT GENOME INSTITUTE GCA_900215575.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/215/575/GCF_900215575.1_IMG-taxon_2728369221_annotated_assembly na assembly from type material na haploid archaea 3164179 3164179 65.0 0 11 11 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/04/22 3324 3239 54 na diff --git a/tests/files/genomes/refseq/assembly_summary_refseq.txt b/tests/files/genomes/refseq/assembly_summary_refseq.txt index efc88d0..e624c80 100644 --- a/tests/files/genomes/refseq/assembly_summary_refseq.txt +++ b/tests/files/genomes/refseq/assembly_summary_refseq.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_001261215.1 PRJNA224116 SAMEA954728 CXAQ00000000.1 na 624 624 Shigella sonnei strain=Sh74369_401064 latest Scaffold Major Full 2015/07/25 5008_7#11 SC GCA_001261215.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/261/215/GCF_001261215.1_5008_7_11 na -GCF_002273625.1 PRJNA224116 SAMN03893265 LKWZ00000000.1 na 1280 1280 Staphylococcus aureus strain=ISU 930 latest Scaffold Major Full 2017/08/28 ISU-930_v1.0 USDA-ARS GCA_002273625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/273/625/GCF_002273625.1_ISU-930_v1.0 na -GCF_000765925.2 PRJNA224116 SAMN03075569 JRPJ00000000.2 na 37372 37372 Helicobacter bilis strain=ATCC 49320 latest Contig Major Full 2019/05/22 ASM76592v2 Massachusetts Institute of Technology GCA_000765925.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/765/925/GCF_000765925.2_ASM76592v2 na -GCF_000560105.1 PRJNA224116 SAMN02383660 JDIQ00000000.1 na 1410740 1280 Staphylococcus aureus T66282 strain=T66282 latest Scaffold Major Full 2014/02/06 Stap_aure_T66282_V1 Broad Institute GCA_000560105.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/560/105/GCF_000560105.1_Stap_aure_T66282_V1 na -GCF_000806405.1 PRJNA224116 SAMN03222688 JUKG00000000.1 na 1639 1639 Listeria monocytogenes strain=BHU3 latest Contig Major Full 2014/12/22 ASM80640v1 Banaras Hindu University GCA_000806405.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/806/405/GCF_000806405.1_ASM80640v1 na -GCF_010667555.1 PRJNA224116 SAMN12785273 VYSE00000000.1 na 1689 1689 Bifidobacterium dentium strain=BRDF 23 latest Contig Major Full 2020/02/14 ASM1066755v1 University of Bologna GCA_010667555.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/667/555/GCF_010667555.1_ASM1066755v1 na -GCF_004354475.1 PRJNA224116 SAMN08555025 PUFE00000000.1 na 214326 1599 Latilactobacillus sakei subsp. sakei strain=ATCC 15521 latest Contig Major Full 2019/03/18 ASM435447v1 Carlsberg Research Laboratory GCA_004354475.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/354/475/GCF_004354475.1_ASM435447v1 assembly from type material na -GCF_010120835.1 PRJNA485481 na 2696432 2696432 Escherichia phage nieznany latest Complete Genome Major Full 2021/02/07 ASM1012083v1 GCA_010120835.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/120/835/GCF_010120835.1_ASM1012083v1 na -GCF_904810345.1 PRJNA224116 SAMEA7336317 na 1806 1773 Mycobacterium tuberculosis variant microti strain=Maus III human latest Complete Genome Major Full 2021/01/27 MmicMaus3 IP GCA_904810345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/904/810/345/GCF_904810345.1_MmicMaus3 na -GCF_002245175.1 PRJNA224116 SAMN03262752 LEMZ00000000.1 na 562 562 Escherichia coli strain=272-3565 latest Scaffold Major Full 2017/08/07 ASM224517v1 Broad Institute GCA_002245175.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/245/175/GCF_002245175.1_ASM224517v1 na -GCF_019703895.1 PRJNA224116 SAMD00254949 na 2779671 2779671 Streptomyces sp. EAS-AB2608 strain=NBRC 114648 latest Complete Genome Major Full 2021/05/11 ASM1970389v1 Global Health Research Section, hhc Data Creation Center, Eisai Co., Ltd. GCA_019703895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/019/703/895/GCF_019703895.1_ASM1970389v1 na -GCF_003062865.1 PRJNA224116 SAMN08644156 PZMT00000000.1 na 573 573 Klebsiella pneumoniae strain=ITU3908 latest Scaffold Major Full 2018/04/23 ASM306286v1 Robert Koch Institute GCA_003062865.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/062/865/GCF_003062865.1_ASM306286v1 na -GCF_000401975.1 PRJNA224116 SAMN02403947 ASHT00000000.1 na 1329363 630 Yersinia enterocolitica subsp. palearctica YE-P1 strain=YE-P1 latest Contig Major Full 2013/05/28 YE-P1_1.0 Max von Pettenkofer-Institut GCA_000401975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/401/975/GCF_000401975.1_YE-P1_1.0 na -GCF_001667425.1 PRJNA224116 SAMN04691946 LZII00000000.1 na 1834104 1834104 Mycobacterium sp. 852002-51613_SCH5001154 strain=852002-51613_SCH5001154 latest Contig Major Full 2016/06/17 ASM166742v1 JCVI GCA_001667425.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/667/425/GCF_001667425.1_ASM166742v1 na -GCF_013369175.2 PRJNA224116 SAMN14503210 JABWOC000000000.2 na 2723303 2723303 Escherichia sp. 8.2195 strain=8.2195 latest Contig Major Full 2020/08/13 ASM1336917v2 FDA GCA_013369175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/013/369/175/GCF_013369175.2_ASM1336917v2 na -GCF_003252965.1 PRJNA224116 SAMN09011133 QEPV00000000.1 na 732 732 Aggregatibacter aphrophilus strain=C2008001229 latest Contig Major Full 2018/06/19 ASM325296v1 Centers for Disease Control and Prevention GCA_003252965.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/252/965/GCF_003252965.1_ASM325296v1 na -GCF_017960365.1 PRJNA224116 SAMN15098422 JABVYN000000000.1 na 380021 380021 Pseudomonas protegens strain=PPRAR03 latest Contig Major Full 2021/04/14 ASM1796036v1 ETH Zurich GCA_017960365.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/017/960/365/GCF_017960365.1_ASM1796036v1 na -GCF_900172265.1 PRJNA224116 SAMEA102345418 FWFK00000000.1 representative genome 1529041 1529041 Roseivivax jejudonensis strain=CECT 8625 latest Contig Major Full 2017/04/29 R.jejudonensis_CECT8625_Spades_Prokka UVEG GCA_900172265.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/172/265/GCF_900172265.1_R.jejudonensis_CECT8625_Spades_Prokka assembly from type material na -GCF_000947895.1 PRJNA224116 SAMEA2794682 CDHH00000000.1 na 1765 1773 Mycobacterium tuberculosis variant bovis strain=MB3 latest Scaffold Major Full 2015/03/03 Assembly of the genome MB3 ERA7 GCA_000947895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3 na -GCF_021369635.1 PRJNA224116 SAMN23428406 JAJNBZ000000000.1 representative genome 1173085 1173085 Paenibacillus profundus strain=YoMME latest Scaffold Major Full 2022/01/05 ASM2136963v1 Faculty of Biology at Sofia University "St. Kliment Ohridski" GCA_021369635.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1 na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000401975.1 PRJNA224116 SAMN02403947 ASHT00000000.1 na 1329363 630 Yersinia enterocolitica subsp. palearctica YE-P1 strain=YE-P1 na latest Contig Major Full 2013/05/28 YE-P1_1.0 Max von Pettenkofer-Institut GCA_000401975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/401/975/GCF_000401975.1_YE-P1_1.0 na na na haploid bacteria 4550830 4550830 46.5 0 86 86 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/23/23 4232 3889 103 23846271 +GCF_000560105.1 PRJNA224116 SAMN02383660 JDIQ00000000.1 na 1410740 1280 Staphylococcus aureus T66282 strain=T66282 na latest Scaffold Major Full 2014/02/06 Stap_aure_T66282_V1 Broad Institute GCA_000560105.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/560/105/GCF_000560105.1_Stap_aure_T66282_V1 na na na haploid bacteria 2870235 2870135 32.5 0 21 22 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/11/22 2905 2746 77 na +GCF_000765925.2 PRJNA224116 SAMN03075569 JRPJ00000000.2 na 37372 37372 Helicobacter bilis strain=ATCC 49320 na latest Contig Major Full 2019/05/22 ASM76592v2 Massachusetts Institute of Technology GCA_000765925.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/765/925/GCF_000765925.2_ASM76592v2 na na na haploid bacteria 2454342 2454342 34.5 0 149 149 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 2407 2283 42 25428971 +GCF_000806405.1 PRJNA224116 SAMN03222688 JUKG00000000.1 na 1639 1639 Listeria monocytogenes strain=BHU3 na latest Contig Major Full 2014/12/22 ASM80640v1 Banaras Hindu University GCA_000806405.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/806/405/GCF_000806405.1_ASM80640v1 na na na haploid bacteria 2946809 2946809 37.5 0 15 15 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/07/23 2983 2802 53 25657276 +GCF_000947895.1 PRJNA224116 SAMEA2794682 CDHH00000000.1 na 1765 1773 Mycobacterium tuberculosis variant bovis strain=MB3 na latest Scaffold Major Full 2015/03/03 Assembly of the genome MB3 ERA7 GCA_000947895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3 na na na haploid bacteria 4259788 4250235 65.5 0 94 94 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/13/22 4135 3795 50 na +GCF_001261215.1 PRJNA224116 SAMEA954728 CXAQ00000000.1 na 624 624 Shigella sonnei strain=Sh74369_401064 na latest Scaffold Major Full 2015/07/25 5008_7#11 SC GCA_001261215.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/261/215/GCF_001261215.1_5008_7_11 na na na haploid bacteria 4718224 4717602 50.5 0 348 348 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/25/23 4824 4251 89 na +GCF_001667425.1 PRJNA224116 SAMN04691946 LZII00000000.1 na 1834104 1834104 Mycobacterium sp. 852002-51613_SCH5001154 strain=852002-51613_SCH5001154 na latest Contig Major Full 2016/06/17 ASM166742v1 JCVI GCA_001667425.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/667/425/GCF_001667425.1_ASM166742v1 na na na haploid bacteria 5397251 5397251 67.5 0 86 86 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/27/22 5128 4964 53 na +GCF_002245175.1 PRJNA224116 SAMN03262752 LEMZ00000000.1 na 562 562 Escherichia coli strain=272-3565 na latest Scaffold Major Full 2017/08/07 ASM224517v1 Broad Institute GCA_002245175.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/245/175/GCF_002245175.1_ASM224517v1 na na na haploid bacteria 5169399 5163125 50.0 0 11 31 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/25/23 5020 4715 101 na +GCF_002273625.1 PRJNA224116 SAMN03893265 LKWZ00000000.1 na 1280 1280 Staphylococcus aureus strain=ISU 930 na latest Scaffold Major Full 2017/08/28 ISU-930_v1.0 USDA-ARS GCA_002273625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/273/625/GCF_002273625.1_ISU-930_v1.0 na na na haploid bacteria 2786299 2786299 32.5 6 91 91 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/07/23 2851 2658 74 na +GCF_003062865.1 PRJNA224116 SAMN08644156 PZMT00000000.1 na 573 573 Klebsiella pneumoniae strain=ITU3908 na latest Scaffold Major Full 2018/04/23 ASM306286v1 Robert Koch Institute GCA_003062865.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/062/865/GCF_003062865.1_ASM306286v1 na na na haploid bacteria 5676071 5675076 56.5 0 91 91 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/06/22 5635 5362 111 na +GCF_003252965.1 PRJNA224116 SAMN09011133 QEPV00000000.1 na 732 732 Aggregatibacter aphrophilus strain=C2008001229 na latest Contig Major Full 2018/06/19 ASM325296v1 Centers for Disease Control and Prevention GCA_003252965.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/252/965/GCF_003252965.1_ASM325296v1 na na na haploid bacteria 2288398 2288398 42.0 0 33 33 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/25/22 2197 2085 67 na +GCF_004354475.1 PRJNA224116 SAMN08555025 PUFE00000000.1 na 214326 1599 Latilactobacillus sakei subsp. sakei strain=ATCC 15521 na latest Contig Major Full 2019/03/18 ASM435447v1 Carlsberg Research Laboratory GCA_004354475.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/354/475/GCF_004354475.1_ASM435447v1 na assembly from type material na haploid bacteria 1936187 1936187 41.0 0 27 27 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/04/23 1962 1850 68 30830251 +GCF_010120835.1 PRJNA485481 na na na 2696432 2956306 Escherichia phage nieznany na na latest Complete Genome Major Full 2021/02/07 ASM1012083v1 NCBI GCA_010120835.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/120/835/GCF_010120835.1_ASM1012083v1 na ICTV species exemplar na haploid viral 144998 144998 39.0 1 1 1 na NCBI RefSeq 02/07/21 266 256 10 na +GCF_010667555.1 PRJNA224116 SAMN12785273 VYSE00000000.1 na 1689 1689 Bifidobacterium dentium strain=BRDF 23 na latest Contig Major Full 2020/02/14 ASM1066755v1 University of Bologna GCA_010667555.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/667/555/GCF_010667555.1_ASM1066755v1 na na na haploid bacteria 2557428 2557428 58.5 0 15 15 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/14/23 2120 2009 62 na +GCF_013369175.2 PRJNA224116 SAMN14503210 JABWOC000000000.2 na 2723303 2723303 Escherichia sp. 8.2195 strain=8.2195 na latest Contig Major Full 2020/08/13 ASM1336917v2 FDA GCA_013369175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/013/369/175/GCF_013369175.2_ASM1336917v2 na na na haploid bacteria 4811454 4811454 50.0 0 309 309 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/24/23 4793 4359 63 32972949 +GCF_017960365.1 PRJNA224116 SAMN15098422 JABVYN000000000.1 na 380021 380021 Pseudomonas protegens strain=PPRAR03 na latest Contig Major Full 2021/04/14 ASM1796036v1 ETH Zurich GCA_017960365.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/017/960/365/GCF_017960365.1_ASM1796036v1 na na na haploid bacteria 7434488 7434488 63.0 0 699 699 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/15/23 7362 6658 94 na +GCF_019703895.1 PRJNA224116 SAMD00254949 na na 2779671 2779671 Streptomyces sp. EAS-AB2608 strain=NBRC 114648 na latest Complete Genome Major Full 2021/05/11 ASM1970389v1 Global Health Research Section, hhc Data Creation Center, Eisai Co., Ltd. GCA_019703895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/019/703/895/GCF_019703895.1_ASM1970389v1 na na na haploid bacteria 9320785 9320785 72.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/15/22 8192 7833 90 33944982 +GCF_021369635.1 PRJNA224116 SAMN23428406 JAJNBZ000000000.1 representative genome 1173085 1173085 Paenibacillus profundus strain=YoMME na latest Scaffold Major Full 2022/01/05 ASM2136963v1 Faculty of Biology at Sofia University "St. Kliment Ohridski" GCA_021369635.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1 na na na haploid bacteria 6918865 6917004 48.5 0 103 103 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/08/22 6289 5882 66 na +GCF_900172265.1 PRJNA224116 SAMEA102345418 FWFK00000000.1 representative genome 1529041 1529041 Roseivivax jejudonensis strain=CECT 8625 na latest Contig Major Full 2017/04/29 R.jejudonensis_CECT8625_Spades_Prokka UVEG GCA_900172265.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/172/265/GCF_900172265.1_R.jejudonensis_CECT8625_Spades_Prokka na assembly from type material na haploid bacteria 4312868 4312868 67.5 0 21 21 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/13/22 4185 4110 50 na +GCF_904810345.1 PRJNA224116 SAMEA7336317 na na 1806 1773 Mycobacterium tuberculosis variant microti strain=Maus III human latest Complete Genome Major Full 2021/01/27 MmicMaus3 IP GCA_904810345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/904/810/345/GCF_904810345.1_MmicMaus3 na na na haploid bacteria 4382575 4382575 65.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/12/22 4105 3831 51 na diff --git a/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt b/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt index efc88d0..e624c80 100644 --- a/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt +++ b/tests/files/genomes/refseq/assembly_summary_refseq_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_001261215.1 PRJNA224116 SAMEA954728 CXAQ00000000.1 na 624 624 Shigella sonnei strain=Sh74369_401064 latest Scaffold Major Full 2015/07/25 5008_7#11 SC GCA_001261215.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/261/215/GCF_001261215.1_5008_7_11 na -GCF_002273625.1 PRJNA224116 SAMN03893265 LKWZ00000000.1 na 1280 1280 Staphylococcus aureus strain=ISU 930 latest Scaffold Major Full 2017/08/28 ISU-930_v1.0 USDA-ARS GCA_002273625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/273/625/GCF_002273625.1_ISU-930_v1.0 na -GCF_000765925.2 PRJNA224116 SAMN03075569 JRPJ00000000.2 na 37372 37372 Helicobacter bilis strain=ATCC 49320 latest Contig Major Full 2019/05/22 ASM76592v2 Massachusetts Institute of Technology GCA_000765925.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/765/925/GCF_000765925.2_ASM76592v2 na -GCF_000560105.1 PRJNA224116 SAMN02383660 JDIQ00000000.1 na 1410740 1280 Staphylococcus aureus T66282 strain=T66282 latest Scaffold Major Full 2014/02/06 Stap_aure_T66282_V1 Broad Institute GCA_000560105.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/560/105/GCF_000560105.1_Stap_aure_T66282_V1 na -GCF_000806405.1 PRJNA224116 SAMN03222688 JUKG00000000.1 na 1639 1639 Listeria monocytogenes strain=BHU3 latest Contig Major Full 2014/12/22 ASM80640v1 Banaras Hindu University GCA_000806405.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/806/405/GCF_000806405.1_ASM80640v1 na -GCF_010667555.1 PRJNA224116 SAMN12785273 VYSE00000000.1 na 1689 1689 Bifidobacterium dentium strain=BRDF 23 latest Contig Major Full 2020/02/14 ASM1066755v1 University of Bologna GCA_010667555.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/667/555/GCF_010667555.1_ASM1066755v1 na -GCF_004354475.1 PRJNA224116 SAMN08555025 PUFE00000000.1 na 214326 1599 Latilactobacillus sakei subsp. sakei strain=ATCC 15521 latest Contig Major Full 2019/03/18 ASM435447v1 Carlsberg Research Laboratory GCA_004354475.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/354/475/GCF_004354475.1_ASM435447v1 assembly from type material na -GCF_010120835.1 PRJNA485481 na 2696432 2696432 Escherichia phage nieznany latest Complete Genome Major Full 2021/02/07 ASM1012083v1 GCA_010120835.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/120/835/GCF_010120835.1_ASM1012083v1 na -GCF_904810345.1 PRJNA224116 SAMEA7336317 na 1806 1773 Mycobacterium tuberculosis variant microti strain=Maus III human latest Complete Genome Major Full 2021/01/27 MmicMaus3 IP GCA_904810345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/904/810/345/GCF_904810345.1_MmicMaus3 na -GCF_002245175.1 PRJNA224116 SAMN03262752 LEMZ00000000.1 na 562 562 Escherichia coli strain=272-3565 latest Scaffold Major Full 2017/08/07 ASM224517v1 Broad Institute GCA_002245175.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/245/175/GCF_002245175.1_ASM224517v1 na -GCF_019703895.1 PRJNA224116 SAMD00254949 na 2779671 2779671 Streptomyces sp. EAS-AB2608 strain=NBRC 114648 latest Complete Genome Major Full 2021/05/11 ASM1970389v1 Global Health Research Section, hhc Data Creation Center, Eisai Co., Ltd. GCA_019703895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/019/703/895/GCF_019703895.1_ASM1970389v1 na -GCF_003062865.1 PRJNA224116 SAMN08644156 PZMT00000000.1 na 573 573 Klebsiella pneumoniae strain=ITU3908 latest Scaffold Major Full 2018/04/23 ASM306286v1 Robert Koch Institute GCA_003062865.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/062/865/GCF_003062865.1_ASM306286v1 na -GCF_000401975.1 PRJNA224116 SAMN02403947 ASHT00000000.1 na 1329363 630 Yersinia enterocolitica subsp. palearctica YE-P1 strain=YE-P1 latest Contig Major Full 2013/05/28 YE-P1_1.0 Max von Pettenkofer-Institut GCA_000401975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/401/975/GCF_000401975.1_YE-P1_1.0 na -GCF_001667425.1 PRJNA224116 SAMN04691946 LZII00000000.1 na 1834104 1834104 Mycobacterium sp. 852002-51613_SCH5001154 strain=852002-51613_SCH5001154 latest Contig Major Full 2016/06/17 ASM166742v1 JCVI GCA_001667425.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/667/425/GCF_001667425.1_ASM166742v1 na -GCF_013369175.2 PRJNA224116 SAMN14503210 JABWOC000000000.2 na 2723303 2723303 Escherichia sp. 8.2195 strain=8.2195 latest Contig Major Full 2020/08/13 ASM1336917v2 FDA GCA_013369175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/013/369/175/GCF_013369175.2_ASM1336917v2 na -GCF_003252965.1 PRJNA224116 SAMN09011133 QEPV00000000.1 na 732 732 Aggregatibacter aphrophilus strain=C2008001229 latest Contig Major Full 2018/06/19 ASM325296v1 Centers for Disease Control and Prevention GCA_003252965.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/252/965/GCF_003252965.1_ASM325296v1 na -GCF_017960365.1 PRJNA224116 SAMN15098422 JABVYN000000000.1 na 380021 380021 Pseudomonas protegens strain=PPRAR03 latest Contig Major Full 2021/04/14 ASM1796036v1 ETH Zurich GCA_017960365.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/017/960/365/GCF_017960365.1_ASM1796036v1 na -GCF_900172265.1 PRJNA224116 SAMEA102345418 FWFK00000000.1 representative genome 1529041 1529041 Roseivivax jejudonensis strain=CECT 8625 latest Contig Major Full 2017/04/29 R.jejudonensis_CECT8625_Spades_Prokka UVEG GCA_900172265.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/172/265/GCF_900172265.1_R.jejudonensis_CECT8625_Spades_Prokka assembly from type material na -GCF_000947895.1 PRJNA224116 SAMEA2794682 CDHH00000000.1 na 1765 1773 Mycobacterium tuberculosis variant bovis strain=MB3 latest Scaffold Major Full 2015/03/03 Assembly of the genome MB3 ERA7 GCA_000947895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3 na -GCF_021369635.1 PRJNA224116 SAMN23428406 JAJNBZ000000000.1 representative genome 1173085 1173085 Paenibacillus profundus strain=YoMME latest Scaffold Major Full 2022/01/05 ASM2136963v1 Faculty of Biology at Sofia University "St. Kliment Ohridski" GCA_021369635.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1 na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000401975.1 PRJNA224116 SAMN02403947 ASHT00000000.1 na 1329363 630 Yersinia enterocolitica subsp. palearctica YE-P1 strain=YE-P1 na latest Contig Major Full 2013/05/28 YE-P1_1.0 Max von Pettenkofer-Institut GCA_000401975.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/401/975/GCF_000401975.1_YE-P1_1.0 na na na haploid bacteria 4550830 4550830 46.5 0 86 86 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/23/23 4232 3889 103 23846271 +GCF_000560105.1 PRJNA224116 SAMN02383660 JDIQ00000000.1 na 1410740 1280 Staphylococcus aureus T66282 strain=T66282 na latest Scaffold Major Full 2014/02/06 Stap_aure_T66282_V1 Broad Institute GCA_000560105.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/560/105/GCF_000560105.1_Stap_aure_T66282_V1 na na na haploid bacteria 2870235 2870135 32.5 0 21 22 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/11/22 2905 2746 77 na +GCF_000765925.2 PRJNA224116 SAMN03075569 JRPJ00000000.2 na 37372 37372 Helicobacter bilis strain=ATCC 49320 na latest Contig Major Full 2019/05/22 ASM76592v2 Massachusetts Institute of Technology GCA_000765925.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/765/925/GCF_000765925.2_ASM76592v2 na na na haploid bacteria 2454342 2454342 34.5 0 149 149 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/09/23 2407 2283 42 25428971 +GCF_000806405.1 PRJNA224116 SAMN03222688 JUKG00000000.1 na 1639 1639 Listeria monocytogenes strain=BHU3 na latest Contig Major Full 2014/12/22 ASM80640v1 Banaras Hindu University GCA_000806405.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/806/405/GCF_000806405.1_ASM80640v1 na na na haploid bacteria 2946809 2946809 37.5 0 15 15 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/07/23 2983 2802 53 25657276 +GCF_000947895.1 PRJNA224116 SAMEA2794682 CDHH00000000.1 na 1765 1773 Mycobacterium tuberculosis variant bovis strain=MB3 na latest Scaffold Major Full 2015/03/03 Assembly of the genome MB3 ERA7 GCA_000947895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/947/895/GCF_000947895.1_Assembly_of_the_genome_MB3 na na na haploid bacteria 4259788 4250235 65.5 0 94 94 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/13/22 4135 3795 50 na +GCF_001261215.1 PRJNA224116 SAMEA954728 CXAQ00000000.1 na 624 624 Shigella sonnei strain=Sh74369_401064 na latest Scaffold Major Full 2015/07/25 5008_7#11 SC GCA_001261215.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/261/215/GCF_001261215.1_5008_7_11 na na na haploid bacteria 4718224 4717602 50.5 0 348 348 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/25/23 4824 4251 89 na +GCF_001667425.1 PRJNA224116 SAMN04691946 LZII00000000.1 na 1834104 1834104 Mycobacterium sp. 852002-51613_SCH5001154 strain=852002-51613_SCH5001154 na latest Contig Major Full 2016/06/17 ASM166742v1 JCVI GCA_001667425.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/667/425/GCF_001667425.1_ASM166742v1 na na na haploid bacteria 5397251 5397251 67.5 0 86 86 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 07/27/22 5128 4964 53 na +GCF_002245175.1 PRJNA224116 SAMN03262752 LEMZ00000000.1 na 562 562 Escherichia coli strain=272-3565 na latest Scaffold Major Full 2017/08/07 ASM224517v1 Broad Institute GCA_002245175.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/245/175/GCF_002245175.1_ASM224517v1 na na na haploid bacteria 5169399 5163125 50.0 0 11 31 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/25/23 5020 4715 101 na +GCF_002273625.1 PRJNA224116 SAMN03893265 LKWZ00000000.1 na 1280 1280 Staphylococcus aureus strain=ISU 930 na latest Scaffold Major Full 2017/08/28 ISU-930_v1.0 USDA-ARS GCA_002273625.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/273/625/GCF_002273625.1_ISU-930_v1.0 na na na haploid bacteria 2786299 2786299 32.5 6 91 91 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/07/23 2851 2658 74 na +GCF_003062865.1 PRJNA224116 SAMN08644156 PZMT00000000.1 na 573 573 Klebsiella pneumoniae strain=ITU3908 na latest Scaffold Major Full 2018/04/23 ASM306286v1 Robert Koch Institute GCA_003062865.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/062/865/GCF_003062865.1_ASM306286v1 na na na haploid bacteria 5676071 5675076 56.5 0 91 91 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/06/22 5635 5362 111 na +GCF_003252965.1 PRJNA224116 SAMN09011133 QEPV00000000.1 na 732 732 Aggregatibacter aphrophilus strain=C2008001229 na latest Contig Major Full 2018/06/19 ASM325296v1 Centers for Disease Control and Prevention GCA_003252965.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/252/965/GCF_003252965.1_ASM325296v1 na na na haploid bacteria 2288398 2288398 42.0 0 33 33 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 10/25/22 2197 2085 67 na +GCF_004354475.1 PRJNA224116 SAMN08555025 PUFE00000000.1 na 214326 1599 Latilactobacillus sakei subsp. sakei strain=ATCC 15521 na latest Contig Major Full 2019/03/18 ASM435447v1 Carlsberg Research Laboratory GCA_004354475.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/354/475/GCF_004354475.1_ASM435447v1 na assembly from type material na haploid bacteria 1936187 1936187 41.0 0 27 27 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 04/04/23 1962 1850 68 30830251 +GCF_010120835.1 PRJNA485481 na na na 2696432 2956306 Escherichia phage nieznany na na latest Complete Genome Major Full 2021/02/07 ASM1012083v1 NCBI GCA_010120835.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/120/835/GCF_010120835.1_ASM1012083v1 na ICTV species exemplar na haploid viral 144998 144998 39.0 1 1 1 na NCBI RefSeq 02/07/21 266 256 10 na +GCF_010667555.1 PRJNA224116 SAMN12785273 VYSE00000000.1 na 1689 1689 Bifidobacterium dentium strain=BRDF 23 na latest Contig Major Full 2020/02/14 ASM1066755v1 University of Bologna GCA_010667555.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/667/555/GCF_010667555.1_ASM1066755v1 na na na haploid bacteria 2557428 2557428 58.5 0 15 15 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 03/14/23 2120 2009 62 na +GCF_013369175.2 PRJNA224116 SAMN14503210 JABWOC000000000.2 na 2723303 2723303 Escherichia sp. 8.2195 strain=8.2195 na latest Contig Major Full 2020/08/13 ASM1336917v2 FDA GCA_013369175.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/013/369/175/GCF_013369175.2_ASM1336917v2 na na na haploid bacteria 4811454 4811454 50.0 0 309 309 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 02/24/23 4793 4359 63 32972949 +GCF_017960365.1 PRJNA224116 SAMN15098422 JABVYN000000000.1 na 380021 380021 Pseudomonas protegens strain=PPRAR03 na latest Contig Major Full 2021/04/14 ASM1796036v1 ETH Zurich GCA_017960365.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/017/960/365/GCF_017960365.1_ASM1796036v1 na na na haploid bacteria 7434488 7434488 63.0 0 699 699 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 05/15/23 7362 6658 94 na +GCF_019703895.1 PRJNA224116 SAMD00254949 na na 2779671 2779671 Streptomyces sp. EAS-AB2608 strain=NBRC 114648 na latest Complete Genome Major Full 2021/05/11 ASM1970389v1 Global Health Research Section, hhc Data Creation Center, Eisai Co., Ltd. GCA_019703895.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/019/703/895/GCF_019703895.1_ASM1970389v1 na na na haploid bacteria 9320785 9320785 72.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 09/15/22 8192 7833 90 33944982 +GCF_021369635.1 PRJNA224116 SAMN23428406 JAJNBZ000000000.1 representative genome 1173085 1173085 Paenibacillus profundus strain=YoMME na latest Scaffold Major Full 2022/01/05 ASM2136963v1 Faculty of Biology at Sofia University "St. Kliment Ohridski" GCA_021369635.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/021/369/635/GCF_021369635.1_ASM2136963v1 na na na haploid bacteria 6918865 6917004 48.5 0 103 103 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/08/22 6289 5882 66 na +GCF_900172265.1 PRJNA224116 SAMEA102345418 FWFK00000000.1 representative genome 1529041 1529041 Roseivivax jejudonensis strain=CECT 8625 na latest Contig Major Full 2017/04/29 R.jejudonensis_CECT8625_Spades_Prokka UVEG GCA_900172265.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/172/265/GCF_900172265.1_R.jejudonensis_CECT8625_Spades_Prokka na assembly from type material na haploid bacteria 4312868 4312868 67.5 0 21 21 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 12/13/22 4185 4110 50 na +GCF_904810345.1 PRJNA224116 SAMEA7336317 na na 1806 1773 Mycobacterium tuberculosis variant microti strain=Maus III human latest Complete Genome Major Full 2021/01/27 MmicMaus3 IP GCA_904810345.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/904/810/345/GCF_904810345.1_MmicMaus3 na na na haploid bacteria 4382575 4382575 65.5 1 1 1 NCBI RefSeq NCBI Prokaryotic Genome Annotation Pipeline (PGAP) 11/12/22 4105 3831 51 na diff --git a/tests/files/genomes/refseq/fungi/assembly_summary.txt b/tests/files/genomes/refseq/fungi/assembly_summary.txt index e49e2a2..56fef6a 100644 --- a/tests/files/genomes/refseq/fungi/assembly_summary.txt +++ b/tests/files/genomes/refseq/fungi/assembly_summary.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_000171015.1 PRJNA264112 SAMN02744066 ABDG00000000.2 representative genome 452589 63577 Trichoderma atroviride IMI 206040 strain=IMI 206040 latest Contig Major Full 2011/11/29 TRIAT v2.0 DOE Joint Genome Institute GCA_000171015.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/171/015/GCF_000171015.1_TRIAT_v2.0 na -GCF_003184765.1 PRJNA479915 SAMN05660730 PSTE00000000.1 representative genome 1448322 487661 Aspergillus aculeatinus CBS 121060 strain=CBS 121060 latest Scaffold Major Full 2018/06/04 Aspacu1 DOE Joint Genome Institute GCA_003184765.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/184/765/GCF_003184765.1_Aspacu1 assembly from type material na -GCF_000182805.2 PRJNA51569 SAMEA3138314 CABT00000000.2 representative genome 771870 5147 Sordaria macrospora k-hell strain=k-hell latest Scaffold Major Full 2012/03/13 ASM18280v2 Ruhr University Bochum, Department of General and Molecular Botany GCA_000182805.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/182/805/GCF_000182805.2_ASM18280v2 na -GCF_001792695.1 PRJNA395481 SAMN04942831 LYCR00000000.1 representative genome 109264 109264 Aspergillus bombycis strain=NRRL 26010 latest Contig Major Full 2016/10/19 ASM179269v1 USDA-ARS-SRRC GCA_001792695.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/792/695/GCF_001792695.1_ASM179269v1 assembly from type material na -GCF_000223465.1 PRJNA225504 SAMN00715317 AEIM00000000.1 representative genome 590646 2315449 Yamadazyma tenuis ATCC 10573 strain=ATCC 10573 latest Scaffold Major Full 2011/08/25 Candida tenuis v1.0 DOE Joint Genome Institute GCA_000223465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/223/465/GCF_000223465.1_Candida_tenuis_v1.0 assembly from type material na -GCF_011947395.1 PRJNA691333 SAMN14421089 JAATWM000000000.2 representative genome 1095194 1095194 Colletotrichum karsti strain=CkLH20 latest Scaffold Major Full 2020/12/08 ASM1194739v2 Central South University of Forestry and Technology GCA_011947395.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/947/395/GCF_011947395.1_ASM1194739v2 na -GCF_000585515.1 PRJNA245128 SAMN00974102 AMGW00000000.1 representative genome 1182544 470704 Cladophialophora yegresii CBS 114405 strain=CBS 114405 latest Scaffold Major Full 2014/03/05 Clad_yegr_CBS_114405_V1 Broad Institute GCA_000585515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/585/515/GCF_000585515.1_Clad_yegr_CBS_114405_V1 assembly from type material na -GCF_001500285.1 PRJNA342682 SAMN04009710 LKNI00000000.1 representative genome 149040 149040 Mollisia scopiformis strain=CBS 120377 latest Scaffold Major Full 2016/01/07 Phisc1 DOE Joint Genome Institute GCA_001500285.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/500/285/GCF_001500285.1_Phisc1 na -GCF_000988165.1 PRJNA445857 SAMN02213592 JPQZ00000000.1 representative genome 40302 40302 Nosema ceranae strain=PA08 1199 latest Contig Major Full 2015/05/05 ASM98816v1 University of Ottawa GCA_000988165.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/988/165/GCF_000988165.1_ASM98816v1 na -GCF_012971845.1 PRJNA645153 SAMN07172427 QCYV00000000.1 representative genome 45133 45133 Lasiodiplodia theobromae strain=AM2As latest Contig Major Full 2020/05/04 ASM1297184v1 Beltsville Agricultural Research Center GCA_012971845.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/012/971/845/GCF_012971845.1_ASM1297184v1 na -GCF_000403515.1 PRJNA264001 SAMD00002584 BAOW00000000.1 representative genome 1305764 327079 Pseudozyma hubeiensis SY62 strain=SY62 latest Scaffold Major Full 2013/05/16 ASM40351v1 Kitami Institute of Technology GCA_000403515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/403/515/GCF_000403515.1_ASM40351v1 na -GCF_000002655.1 PRJNA14003 SAMN00115746 AAHF00000000.1 representative genome 330879 746128 Aspergillus fumigatus Af293 strain=Af293 latest Chromosome Major Full 2005/06/10 ASM265v1 J. Craig Venter Institute GCA_000002655.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/655/GCF_000002655.1_ASM265v1 na -GCF_000149245.1 PRJNA177334 SAMN03081441 na 235443 5207 Cryptococcus neoformans var. grubii H99 strain=H99 latest Chromosome Major Full 2014/02/07 CNA3 Broad Institute GCA_000149245.3 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/149/245/GCF_000149245.1_CNA3 na -GCF_000300595.1 PRJNA242544 SAMN02981278 AEHB00000000.1 representative genome 650164 231932 Phanerochaete carnosa HHB-10118-sp strain=HHB-10118-sp latest Scaffold Major Full 2012/10/16 Phanerochaete carnosa HHB-10118-Sp v1.0 DOE Joint Genome Institute GCA_000300595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/300/595/GCF_000300595.1_Phanerochaete_carnosa_HHB-10118-Sp_v1.0 na -GCF_000226545.1 PRJNA29799 representative genome 515849 2587412 Podospora anserina S mat+ latest Contig Major Full 2008/05/14 ASM22654v1 Genoscope - Centre National de Séquençage GCA_000226545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/226/545/GCF_000226545.1_ASM22654v1 na -GCF_000961545.1 PRJNA319337 SAMN03199974 AXCR00000000.1 representative genome 1397361 29908 Sporothrix schenckii 1099-18 strain=1099-18 latest Contig Major Full 2015/03/24 S_schenckii_v1 LNCC GCA_000961545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/961/545/GCF_000961545.1_S_schenckii_v1 na -GCF_010093535.1 PRJNA625772 SAMN05446602 JAAEJD000000000.1 representative genome 673940 673940 Lindgomyces ingoldianus strain=ATCC 200398 latest Scaffold Major Full 2020/01/31 Linin1 DOE Joint Genome Institute GCA_010093535.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/093/535/GCF_010093535.1_Linin1 assembly from type material na -GCF_001890905.1 PRJNA374040 SAMN00788628 MRCK00000000.1 representative genome 690307 5053 Aspergillus aculeatus ATCC 16872 strain=ATCC 16872 latest Scaffold Major Full 2016/12/08 Aspac1 JGI GCA_001890905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/890/905/GCF_001890905.1_Aspac1 assembly from type material na -GCF_900519145.1 PRJNA727466 SAMEA4827382 ULHA00000000.1 representative genome 120017 120017 Ustilago hordei strain=Uho2 latest Contig Major Full 2021/02/23 Uho2_v1 Technische Universitat Munchen - WZW GCA_900519145.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/519/145/GCF_900519145.1_Uho2_v1 na -GCF_008704595.1 PRJNA629604 SAMN11490865 SWFT00000000.1 representative genome 5481 5481 Diutina rugosa strain=CBS 613 latest Scaffold Major Full 2019/09/26 ASM870459v1 Centre for Genomic Regulation GCA_008704595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/008/704/595/GCF_008704595.1_ASM870459v1 assembly from type material na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000002655.1 PRJNA14003 SAMN00115746 AAHF00000000.1 representative genome 330879 746128 Aspergillus fumigatus Af293 strain=Af293 na latest Chromosome Major Full 2005/06/10 ASM265v1 J. Craig Venter Institute GCA_000002655.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/655/GCF_000002655.1_ASM265v1 na na na haploid fungi 29384958 28809958 49.5 8 8 19 J. Craig Venter Institute Annotation submitted by J. Craig Venter Institute 04/18/22 9915 9630 228 16372009 +GCF_000149245.1 PRJNA177334 SAMN03081441 na na 235443 5207 Cryptococcus neoformans var. grubii H99 strain=H99 na latest Chromosome Major Full 2014/02/07 CNA3 Broad Institute GCA_000149245.3 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/149/245/GCF_000149245.1_CNA3 na assembly from type material na haploid fungi 18891193 18891093 48.0 14 14 14 Broad Institute Annotation submitted by Broad Institute 04/04/18 8338 6975 1359 24743168 +GCF_000171015.1 PRJNA264112 SAMN02744066 ABDG00000000.2 representative genome 452589 63577 Trichoderma atroviride IMI 206040 strain=IMI 206040 na latest Contig Major Full 2011/11/29 TRIAT v2.0 DOE Joint Genome Institute GCA_000171015.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/171/015/GCF_000171015.1_TRIAT_v2.0 na na na haploid fungi 36143664 36143664 49.5 0 29 29 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 09/14/15 11810 11809 0 21501500 +GCF_000223465.1 PRJNA225504 SAMN00715317 AEIM00000000.1 representative genome 590646 2315449 Yamadazyma tenuis ATCC 10573 strain=ATCC 10573 na latest Scaffold Major Full 2011/08/25 Candida tenuis v1.0 DOE Joint Genome Institute GCA_000223465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/223/465/GCF_000223465.1_Candida_tenuis_v1.0 na assembly from type material na haploid fungi 10747050 10582270 42.5 0 25 72 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 11/27/19 5547 5547 0 21788494 +GCF_000226545.1 PRJNA29799 na na representative genome 515849 2587412 Podospora anserina S mat+ strain=S mat+ na latest Contig Major Full 2008/05/14 ASM22654v1 Genoscope - Centre National de Séquençage GCA_000226545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/226/545/GCF_000226545.1_ASM22654v1 na na na haploid fungi 34718894 34250768 52.0 7 33 33 Genoscope - Centre National de Séquençage Annotation submitted by Genoscope - Centre National de Séquençage 04/11/18 10527 10518 0 na +GCF_000300595.1 PRJNA242544 SAMN02981278 AEHB00000000.1 representative genome 650164 231932 Phanerochaete carnosa HHB-10118-sp strain=HHB-10118-sp na latest Scaffold Major Full 2012/10/16 Phanerochaete carnosa HHB-10118-Sp v1.0 DOE Joint Genome Institute GCA_000300595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/300/595/GCF_000300595.1_Phanerochaete_carnosa_HHB-10118-Sp_v1.0 na na na haploid fungi 46293325 43133388 53.0 0 1137 2598 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 02/05/20 14080 13918 138 22937793 +GCF_000403515.1 PRJNA264001 SAMD00002584 BAOW00000000.1 representative genome 1305764 327079 Pseudozyma hubeiensis SY62 strain=SY62 na latest Scaffold Major Full 2013/05/16 ASM40351v1 Kitami Institute of Technology GCA_000403515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/403/515/GCF_000403515.1_ASM40351v1 na na na haploid fungi 18442938 18435583 56.5 0 74 160 Kitami Institute of Technology Annotation submitted by Kitami Institute of Technology 04/20/15 7619 7472 147 23814110 +GCF_000585515.1 PRJNA245128 SAMN00974102 AMGW00000000.1 representative genome 1182544 470704 Cladophialophora yegresii CBS 114405 strain=CBS 114405 na latest Scaffold Major Full 2014/03/05 Clad_yegr_CBS_114405_V1 Broad Institute GCA_000585515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/585/515/GCF_000585515.1_Clad_yegr_CBS_114405_V1 na assembly from type material na haploid fungi 27898422 27887733 53.5 0 8 8 Broad Institute Annotation submitted by Broad Institute 12/22/14 10118 10118 0 na +GCF_000961545.1 PRJNA319337 SAMN03199974 AXCR00000000.1 representative genome 1397361 29908 Sporothrix schenckii 1099-18 strain=1099-18 na latest Contig Major Full 2015/03/24 S_schenckii_v1 LNCC GCA_000961545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/961/545/GCF_000961545.1_S_schenckii_v1 na na na haploid fungi 32375103 32375103 54.5 0 16 16 LNCC Annotation submitted by LNCC 04/19/22 10434 10293 141 25480940;25351875 +GCF_000988165.1 PRJNA445857 SAMN02213592 JPQZ00000000.1 representative genome 40302 40302 Nosema ceranae strain=PA08 1199 na latest Contig Major Full 2015/05/05 ASM98816v1 University of Ottawa GCA_000988165.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/988/165/GCF_000988165.1_ASM98816v1 na na na haploid fungi 5690748 5690748 25.0 0 536 536 University of Ottawa Annotation submitted by University of Ottawa 03/31/18 3265 3209 37 25914091 +GCF_001500285.1 PRJNA342682 SAMN04009710 LKNI00000000.1 representative genome 149040 149040 Mollisia scopiformis strain=CBS 120377 na latest Scaffold Major Full 2016/01/07 Phisc1 DOE Joint Genome Institute GCA_001500285.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/500/285/GCF_001500285.1_Phisc1 na na na haploid fungi 48876257 48619125 47.5 0 71 345 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 12/09/21 18648 18556 82 26950333 +GCF_001792695.1 PRJNA395481 SAMN04942831 LYCR00000000.1 representative genome 109264 109264 Aspergillus bombycis strain=NRRL26010 na latest Contig Major Full 2016/10/19 ASM179269v1 USDA-ARS-SRRC GCA_001792695.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/792/695/GCF_001792695.1_ASM179269v1 na assembly from type material na haploid fungi 37474605 37474605 48.5 0 450 450 USDA-ARS-SRRC Annotation submitted by USDA-ARS-SRRC 09/22/17 12263 12263 0 27664179 +GCF_001890905.1 PRJNA374040 SAMN00788628 MRCK00000000.1 representative genome 690307 5053 Aspergillus aculeatus ATCC 16872 strain=ATCC 16872 na latest Scaffold Major Full 2016/12/08 Aspac1 JGI GCA_001890905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/890/905/GCF_001890905.1_Aspac1 na assembly from type material na haploid fungi 35424414 35185997 50.5 0 660 852 JGI Annotation submitted by JGI 02/03/20 11152 10830 321 na +GCF_003184765.1 PRJNA479915 SAMN05660730 PSTE00000000.1 representative genome 1448322 487661 Aspergillus aculeatinus CBS 121060 strain=CBS 121060 na latest Scaffold Major Full 2018/06/04 Aspacu1 DOE Joint Genome Institute GCA_003184765.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/184/765/GCF_003184765.1_Aspacu1 na assembly from type material na haploid fungi 36471649 36442703 50.0 0 121 325 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 07/06/18 12354 12028 323 na +GCF_008704595.1 PRJNA629604 SAMN11490865 SWFT00000000.1 representative genome 5481 5481 Diutina rugosa strain=CBS 613 na latest Scaffold Major Full 2019/09/26 ASM870459v1 Centre for Genomic Regulation GCA_008704595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/008/704/595/GCF_008704595.1_ASM870459v1 na assembly from type material na haploid fungi 13445959 13433795 49.5 0 169 169 Centre for Genomic Regulation Annotation submitted by Centre for Genomic Regulation 05/06/20 5819 5815 0 31575637 +GCF_010093535.1 PRJNA625772 SAMN05446602 JAAEJD000000000.1 representative genome 673940 673940 Lindgomyces ingoldianus strain=ATCC 200398 na latest Scaffold Major Full 2020/01/31 Linin1 DOE Joint Genome Institute GCA_010093535.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/093/535/GCF_010093535.1_Linin1 na assembly from type material na haploid fungi 69030646 68103745 41.5 0 200 1522 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 04/21/20 16000 15946 45 na +GCF_011947395.1 PRJNA691333 SAMN14421089 JAATWM000000000.2 representative genome 1095194 1095194 Colletotrichum karsti strain=CkLH20 na latest Scaffold Major Full 2020/12/08 ASM1194739v2 Central South University of Forestry and Technology GCA_011947395.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/947/395/GCF_011947395.1_ASM1194739v2 na na na haploid fungi 51850041 51849281 52.5 0 127 127 Central South University of Forestry and Technology Annotation submitted by Central South University of Forestry and Technology 02/25/22 13328 13328 0 na +GCF_012971845.1 PRJNA645153 SAMN07172427 QCYV00000000.1 representative genome 45133 45133 Lasiodiplodia theobromae strain=AM2As na latest Contig Major Full 2020/05/04 ASM1297184v1 Beltsville Agricultural Research Center GCA_012971845.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/012/971/845/GCF_012971845.1_ASM1297184v1 na na na haploid fungi 43694574 43694574 54.5 0 296 296 Beltsville Agricultural Research Center Annotation submitted by Beltsville Agricultural Research Center 08/04/20 13054 13054 0 31580730 +GCF_900519145.1 PRJNA727466 SAMEA4827382 ULHA00000000.1 representative genome 120017 120017 Ustilago hordei strain=Uho2 na latest Contig Major Full 2021/02/23 Uho2_v1 Technische Universitat Munchen - WZW GCA_900519145.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/519/145/GCF_900519145.1_Uho2_v1 na na na haploid fungi 26637005 26637005 51.0 0 45 45 Technische Universitat Munchen - WZW Annotation submitted by Technische Universitat Munchen - WZW 09/15/21 7703 7449 254 na +GCF_000182805.2 PRJNA51569 SAMEA3138314 CABT00000000.2 na 771870 5147 Sordaria macrospora k-hell strain=k-hell na replaced Scaffold Major Full 2012/03/13 ASM18280v2 Ruhr University Bochum, Department of General and Molecular Botany GCA_000182805.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/182/805/GCF_000182805.2_ASM18280v2 na na 2023/03/22 haploid fungi 38391383 38253259 52.0 0 1583 1583 Ruhr University Bochum, Department of General and Molecular Botany Annotation submitted by Ruhr University Bochum, Department of General and Molecular Botany na 10551 9896 462 20386741 diff --git a/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt b/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt index e49e2a2..56fef6a 100644 --- a/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt +++ b/tests/files/genomes/refseq/fungi/assembly_summary_historical.txt @@ -1,22 +1,22 @@ -# See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. -# assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date -GCF_000171015.1 PRJNA264112 SAMN02744066 ABDG00000000.2 representative genome 452589 63577 Trichoderma atroviride IMI 206040 strain=IMI 206040 latest Contig Major Full 2011/11/29 TRIAT v2.0 DOE Joint Genome Institute GCA_000171015.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/171/015/GCF_000171015.1_TRIAT_v2.0 na -GCF_003184765.1 PRJNA479915 SAMN05660730 PSTE00000000.1 representative genome 1448322 487661 Aspergillus aculeatinus CBS 121060 strain=CBS 121060 latest Scaffold Major Full 2018/06/04 Aspacu1 DOE Joint Genome Institute GCA_003184765.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/184/765/GCF_003184765.1_Aspacu1 assembly from type material na -GCF_000182805.2 PRJNA51569 SAMEA3138314 CABT00000000.2 representative genome 771870 5147 Sordaria macrospora k-hell strain=k-hell latest Scaffold Major Full 2012/03/13 ASM18280v2 Ruhr University Bochum, Department of General and Molecular Botany GCA_000182805.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/182/805/GCF_000182805.2_ASM18280v2 na -GCF_001792695.1 PRJNA395481 SAMN04942831 LYCR00000000.1 representative genome 109264 109264 Aspergillus bombycis strain=NRRL 26010 latest Contig Major Full 2016/10/19 ASM179269v1 USDA-ARS-SRRC GCA_001792695.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/792/695/GCF_001792695.1_ASM179269v1 assembly from type material na -GCF_000223465.1 PRJNA225504 SAMN00715317 AEIM00000000.1 representative genome 590646 2315449 Yamadazyma tenuis ATCC 10573 strain=ATCC 10573 latest Scaffold Major Full 2011/08/25 Candida tenuis v1.0 DOE Joint Genome Institute GCA_000223465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/223/465/GCF_000223465.1_Candida_tenuis_v1.0 assembly from type material na -GCF_011947395.1 PRJNA691333 SAMN14421089 JAATWM000000000.2 representative genome 1095194 1095194 Colletotrichum karsti strain=CkLH20 latest Scaffold Major Full 2020/12/08 ASM1194739v2 Central South University of Forestry and Technology GCA_011947395.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/947/395/GCF_011947395.1_ASM1194739v2 na -GCF_000585515.1 PRJNA245128 SAMN00974102 AMGW00000000.1 representative genome 1182544 470704 Cladophialophora yegresii CBS 114405 strain=CBS 114405 latest Scaffold Major Full 2014/03/05 Clad_yegr_CBS_114405_V1 Broad Institute GCA_000585515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/585/515/GCF_000585515.1_Clad_yegr_CBS_114405_V1 assembly from type material na -GCF_001500285.1 PRJNA342682 SAMN04009710 LKNI00000000.1 representative genome 149040 149040 Mollisia scopiformis strain=CBS 120377 latest Scaffold Major Full 2016/01/07 Phisc1 DOE Joint Genome Institute GCA_001500285.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/500/285/GCF_001500285.1_Phisc1 na -GCF_000988165.1 PRJNA445857 SAMN02213592 JPQZ00000000.1 representative genome 40302 40302 Nosema ceranae strain=PA08 1199 latest Contig Major Full 2015/05/05 ASM98816v1 University of Ottawa GCA_000988165.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/988/165/GCF_000988165.1_ASM98816v1 na -GCF_012971845.1 PRJNA645153 SAMN07172427 QCYV00000000.1 representative genome 45133 45133 Lasiodiplodia theobromae strain=AM2As latest Contig Major Full 2020/05/04 ASM1297184v1 Beltsville Agricultural Research Center GCA_012971845.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/012/971/845/GCF_012971845.1_ASM1297184v1 na -GCF_000403515.1 PRJNA264001 SAMD00002584 BAOW00000000.1 representative genome 1305764 327079 Pseudozyma hubeiensis SY62 strain=SY62 latest Scaffold Major Full 2013/05/16 ASM40351v1 Kitami Institute of Technology GCA_000403515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/403/515/GCF_000403515.1_ASM40351v1 na -GCF_000002655.1 PRJNA14003 SAMN00115746 AAHF00000000.1 representative genome 330879 746128 Aspergillus fumigatus Af293 strain=Af293 latest Chromosome Major Full 2005/06/10 ASM265v1 J. Craig Venter Institute GCA_000002655.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/655/GCF_000002655.1_ASM265v1 na -GCF_000149245.1 PRJNA177334 SAMN03081441 na 235443 5207 Cryptococcus neoformans var. grubii H99 strain=H99 latest Chromosome Major Full 2014/02/07 CNA3 Broad Institute GCA_000149245.3 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/149/245/GCF_000149245.1_CNA3 na -GCF_000300595.1 PRJNA242544 SAMN02981278 AEHB00000000.1 representative genome 650164 231932 Phanerochaete carnosa HHB-10118-sp strain=HHB-10118-sp latest Scaffold Major Full 2012/10/16 Phanerochaete carnosa HHB-10118-Sp v1.0 DOE Joint Genome Institute GCA_000300595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/300/595/GCF_000300595.1_Phanerochaete_carnosa_HHB-10118-Sp_v1.0 na -GCF_000226545.1 PRJNA29799 representative genome 515849 2587412 Podospora anserina S mat+ latest Contig Major Full 2008/05/14 ASM22654v1 Genoscope - Centre National de Séquençage GCA_000226545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/226/545/GCF_000226545.1_ASM22654v1 na -GCF_000961545.1 PRJNA319337 SAMN03199974 AXCR00000000.1 representative genome 1397361 29908 Sporothrix schenckii 1099-18 strain=1099-18 latest Contig Major Full 2015/03/24 S_schenckii_v1 LNCC GCA_000961545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/961/545/GCF_000961545.1_S_schenckii_v1 na -GCF_010093535.1 PRJNA625772 SAMN05446602 JAAEJD000000000.1 representative genome 673940 673940 Lindgomyces ingoldianus strain=ATCC 200398 latest Scaffold Major Full 2020/01/31 Linin1 DOE Joint Genome Institute GCA_010093535.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/093/535/GCF_010093535.1_Linin1 assembly from type material na -GCF_001890905.1 PRJNA374040 SAMN00788628 MRCK00000000.1 representative genome 690307 5053 Aspergillus aculeatus ATCC 16872 strain=ATCC 16872 latest Scaffold Major Full 2016/12/08 Aspac1 JGI GCA_001890905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/890/905/GCF_001890905.1_Aspac1 assembly from type material na -GCF_900519145.1 PRJNA727466 SAMEA4827382 ULHA00000000.1 representative genome 120017 120017 Ustilago hordei strain=Uho2 latest Contig Major Full 2021/02/23 Uho2_v1 Technische Universitat Munchen - WZW GCA_900519145.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/519/145/GCF_900519145.1_Uho2_v1 na -GCF_008704595.1 PRJNA629604 SAMN11490865 SWFT00000000.1 representative genome 5481 5481 Diutina rugosa strain=CBS 613 latest Scaffold Major Full 2019/09/26 ASM870459v1 Centre for Genomic Regulation GCA_008704595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/008/704/595/GCF_008704595.1_ASM870459v1 assembly from type material na +## See ftp://ftp.ncbi.nlm.nih.gov/genomes/README_assembly_summary.txt for a description of the columns in this file. +#assembly_accession bioproject biosample wgs_master refseq_category taxid species_taxid organism_name infraspecific_name isolate version_status assembly_level release_type genome_rep seq_rel_date asm_name asm_submitter gbrs_paired_asm paired_asm_comp ftp_path excluded_from_refseq relation_to_type_material asm_not_live_date assembly_type group genome_size genome_size_ungapped gc_percent replicon_count scaffold_count contig_count annotation_provider annotation_name annotation_date total_gene_count protein_coding_gene_count non_coding_gene_count pubmed_id +GCF_000002655.1 PRJNA14003 SAMN00115746 AAHF00000000.1 representative genome 330879 746128 Aspergillus fumigatus Af293 strain=Af293 na latest Chromosome Major Full 2005/06/10 ASM265v1 J. Craig Venter Institute GCA_000002655.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/655/GCF_000002655.1_ASM265v1 na na na haploid fungi 29384958 28809958 49.5 8 8 19 J. Craig Venter Institute Annotation submitted by J. Craig Venter Institute 04/18/22 9915 9630 228 16372009 +GCF_000149245.1 PRJNA177334 SAMN03081441 na na 235443 5207 Cryptococcus neoformans var. grubii H99 strain=H99 na latest Chromosome Major Full 2014/02/07 CNA3 Broad Institute GCA_000149245.3 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/149/245/GCF_000149245.1_CNA3 na assembly from type material na haploid fungi 18891193 18891093 48.0 14 14 14 Broad Institute Annotation submitted by Broad Institute 04/04/18 8338 6975 1359 24743168 +GCF_000171015.1 PRJNA264112 SAMN02744066 ABDG00000000.2 representative genome 452589 63577 Trichoderma atroviride IMI 206040 strain=IMI 206040 na latest Contig Major Full 2011/11/29 TRIAT v2.0 DOE Joint Genome Institute GCA_000171015.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/171/015/GCF_000171015.1_TRIAT_v2.0 na na na haploid fungi 36143664 36143664 49.5 0 29 29 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 09/14/15 11810 11809 0 21501500 +GCF_000223465.1 PRJNA225504 SAMN00715317 AEIM00000000.1 representative genome 590646 2315449 Yamadazyma tenuis ATCC 10573 strain=ATCC 10573 na latest Scaffold Major Full 2011/08/25 Candida tenuis v1.0 DOE Joint Genome Institute GCA_000223465.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/223/465/GCF_000223465.1_Candida_tenuis_v1.0 na assembly from type material na haploid fungi 10747050 10582270 42.5 0 25 72 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 11/27/19 5547 5547 0 21788494 +GCF_000226545.1 PRJNA29799 na na representative genome 515849 2587412 Podospora anserina S mat+ strain=S mat+ na latest Contig Major Full 2008/05/14 ASM22654v1 Genoscope - Centre National de Séquençage GCA_000226545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/226/545/GCF_000226545.1_ASM22654v1 na na na haploid fungi 34718894 34250768 52.0 7 33 33 Genoscope - Centre National de Séquençage Annotation submitted by Genoscope - Centre National de Séquençage 04/11/18 10527 10518 0 na +GCF_000300595.1 PRJNA242544 SAMN02981278 AEHB00000000.1 representative genome 650164 231932 Phanerochaete carnosa HHB-10118-sp strain=HHB-10118-sp na latest Scaffold Major Full 2012/10/16 Phanerochaete carnosa HHB-10118-Sp v1.0 DOE Joint Genome Institute GCA_000300595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/300/595/GCF_000300595.1_Phanerochaete_carnosa_HHB-10118-Sp_v1.0 na na na haploid fungi 46293325 43133388 53.0 0 1137 2598 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 02/05/20 14080 13918 138 22937793 +GCF_000403515.1 PRJNA264001 SAMD00002584 BAOW00000000.1 representative genome 1305764 327079 Pseudozyma hubeiensis SY62 strain=SY62 na latest Scaffold Major Full 2013/05/16 ASM40351v1 Kitami Institute of Technology GCA_000403515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/403/515/GCF_000403515.1_ASM40351v1 na na na haploid fungi 18442938 18435583 56.5 0 74 160 Kitami Institute of Technology Annotation submitted by Kitami Institute of Technology 04/20/15 7619 7472 147 23814110 +GCF_000585515.1 PRJNA245128 SAMN00974102 AMGW00000000.1 representative genome 1182544 470704 Cladophialophora yegresii CBS 114405 strain=CBS 114405 na latest Scaffold Major Full 2014/03/05 Clad_yegr_CBS_114405_V1 Broad Institute GCA_000585515.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/585/515/GCF_000585515.1_Clad_yegr_CBS_114405_V1 na assembly from type material na haploid fungi 27898422 27887733 53.5 0 8 8 Broad Institute Annotation submitted by Broad Institute 12/22/14 10118 10118 0 na +GCF_000961545.1 PRJNA319337 SAMN03199974 AXCR00000000.1 representative genome 1397361 29908 Sporothrix schenckii 1099-18 strain=1099-18 na latest Contig Major Full 2015/03/24 S_schenckii_v1 LNCC GCA_000961545.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/961/545/GCF_000961545.1_S_schenckii_v1 na na na haploid fungi 32375103 32375103 54.5 0 16 16 LNCC Annotation submitted by LNCC 04/19/22 10434 10293 141 25480940;25351875 +GCF_000988165.1 PRJNA445857 SAMN02213592 JPQZ00000000.1 representative genome 40302 40302 Nosema ceranae strain=PA08 1199 na latest Contig Major Full 2015/05/05 ASM98816v1 University of Ottawa GCA_000988165.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/988/165/GCF_000988165.1_ASM98816v1 na na na haploid fungi 5690748 5690748 25.0 0 536 536 University of Ottawa Annotation submitted by University of Ottawa 03/31/18 3265 3209 37 25914091 +GCF_001500285.1 PRJNA342682 SAMN04009710 LKNI00000000.1 representative genome 149040 149040 Mollisia scopiformis strain=CBS 120377 na latest Scaffold Major Full 2016/01/07 Phisc1 DOE Joint Genome Institute GCA_001500285.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/500/285/GCF_001500285.1_Phisc1 na na na haploid fungi 48876257 48619125 47.5 0 71 345 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 12/09/21 18648 18556 82 26950333 +GCF_001792695.1 PRJNA395481 SAMN04942831 LYCR00000000.1 representative genome 109264 109264 Aspergillus bombycis strain=NRRL26010 na latest Contig Major Full 2016/10/19 ASM179269v1 USDA-ARS-SRRC GCA_001792695.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/792/695/GCF_001792695.1_ASM179269v1 na assembly from type material na haploid fungi 37474605 37474605 48.5 0 450 450 USDA-ARS-SRRC Annotation submitted by USDA-ARS-SRRC 09/22/17 12263 12263 0 27664179 +GCF_001890905.1 PRJNA374040 SAMN00788628 MRCK00000000.1 representative genome 690307 5053 Aspergillus aculeatus ATCC 16872 strain=ATCC 16872 na latest Scaffold Major Full 2016/12/08 Aspac1 JGI GCA_001890905.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/890/905/GCF_001890905.1_Aspac1 na assembly from type material na haploid fungi 35424414 35185997 50.5 0 660 852 JGI Annotation submitted by JGI 02/03/20 11152 10830 321 na +GCF_003184765.1 PRJNA479915 SAMN05660730 PSTE00000000.1 representative genome 1448322 487661 Aspergillus aculeatinus CBS 121060 strain=CBS 121060 na latest Scaffold Major Full 2018/06/04 Aspacu1 DOE Joint Genome Institute GCA_003184765.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/184/765/GCF_003184765.1_Aspacu1 na assembly from type material na haploid fungi 36471649 36442703 50.0 0 121 325 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 07/06/18 12354 12028 323 na +GCF_008704595.1 PRJNA629604 SAMN11490865 SWFT00000000.1 representative genome 5481 5481 Diutina rugosa strain=CBS 613 na latest Scaffold Major Full 2019/09/26 ASM870459v1 Centre for Genomic Regulation GCA_008704595.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/008/704/595/GCF_008704595.1_ASM870459v1 na assembly from type material na haploid fungi 13445959 13433795 49.5 0 169 169 Centre for Genomic Regulation Annotation submitted by Centre for Genomic Regulation 05/06/20 5819 5815 0 31575637 +GCF_010093535.1 PRJNA625772 SAMN05446602 JAAEJD000000000.1 representative genome 673940 673940 Lindgomyces ingoldianus strain=ATCC 200398 na latest Scaffold Major Full 2020/01/31 Linin1 DOE Joint Genome Institute GCA_010093535.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/010/093/535/GCF_010093535.1_Linin1 na assembly from type material na haploid fungi 69030646 68103745 41.5 0 200 1522 DOE Joint Genome Institute Annotation submitted by DOE Joint Genome Institute 04/21/20 16000 15946 45 na +GCF_011947395.1 PRJNA691333 SAMN14421089 JAATWM000000000.2 representative genome 1095194 1095194 Colletotrichum karsti strain=CkLH20 na latest Scaffold Major Full 2020/12/08 ASM1194739v2 Central South University of Forestry and Technology GCA_011947395.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/011/947/395/GCF_011947395.1_ASM1194739v2 na na na haploid fungi 51850041 51849281 52.5 0 127 127 Central South University of Forestry and Technology Annotation submitted by Central South University of Forestry and Technology 02/25/22 13328 13328 0 na +GCF_012971845.1 PRJNA645153 SAMN07172427 QCYV00000000.1 representative genome 45133 45133 Lasiodiplodia theobromae strain=AM2As na latest Contig Major Full 2020/05/04 ASM1297184v1 Beltsville Agricultural Research Center GCA_012971845.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/012/971/845/GCF_012971845.1_ASM1297184v1 na na na haploid fungi 43694574 43694574 54.5 0 296 296 Beltsville Agricultural Research Center Annotation submitted by Beltsville Agricultural Research Center 08/04/20 13054 13054 0 31580730 +GCF_900519145.1 PRJNA727466 SAMEA4827382 ULHA00000000.1 representative genome 120017 120017 Ustilago hordei strain=Uho2 na latest Contig Major Full 2021/02/23 Uho2_v1 Technische Universitat Munchen - WZW GCA_900519145.1 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/900/519/145/GCF_900519145.1_Uho2_v1 na na na haploid fungi 26637005 26637005 51.0 0 45 45 Technische Universitat Munchen - WZW Annotation submitted by Technische Universitat Munchen - WZW 09/15/21 7703 7449 254 na +GCF_000182805.2 PRJNA51569 SAMEA3138314 CABT00000000.2 na 771870 5147 Sordaria macrospora k-hell strain=k-hell na replaced Scaffold Major Full 2012/03/13 ASM18280v2 Ruhr University Bochum, Department of General and Molecular Botany GCA_000182805.2 identical https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/182/805/GCF_000182805.2_ASM18280v2 na na 2023/03/22 haploid fungi 38391383 38253259 52.0 0 1583 1583 Ruhr University Bochum, Department of General and Molecular Botany Annotation submitted by Ruhr University Bochum, Department of General and Molecular Botany na 10551 9896 462 20386741 diff --git a/tests/integration_offline.bats b/tests/integration_offline.bats index 96df12d..00ca1de 100644 --- a/tests/integration_offline.bats +++ b/tests/integration_offline.bats @@ -46,7 +46,7 @@ setup_file() { run ./genome_updater.sh -d refseq -b ${label} -o ${outdir} sanity_check ${outdir} ${label} assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files - for file in $(ls_files ${outdir} ${label}); do + for file in $(find_files ${outdir} ${label}); do [[ "$(basename $file)" = GCF* ]] # filename starts with GCF_ done } @@ -57,7 +57,7 @@ setup_file() { run ./genome_updater.sh -d genbank -b ${label} -o ${outdir} sanity_check ${outdir} ${label} assert [ $(count_files ${outdir} ${label}) -gt 0 ] # contains files - for file in $(ls_files ${outdir} ${label}); do + for file in $(find_files ${outdir} ${label}); do [[ "$(basename $file)" = GCA* ]] # filename starts with GCA_ done } @@ -70,7 +70,7 @@ setup_file() { sanity_check ${outdir} ${label} files_refseq=$(count_files ${outdir} ${label}) assert [ ${files_refseq} -gt 0 ] # contains files - for file in $(ls_files ${outdir} ${label}); do + for file in $(find_files ${outdir} ${label}); do [[ "$(basename $file)" = GCF* ]] # filename starts with GCF_ done @@ -79,7 +79,7 @@ setup_file() { sanity_check ${outdir} ${label} files_genbank=$(count_files ${outdir} ${label}) assert [ ${files_genbank} -gt 0 ] # contains files - for file in $(ls_files ${outdir} ${label}); do + for file in $(find_files ${outdir} ${label}); do [[ "$(basename $file)" = GCA* ]] # filename starts with GCA_ done diff --git a/tests/integration_online.bats b/tests/integration_online.bats index f0a7663..a20aaf2 100644 --- a/tests/integration_online.bats +++ b/tests/integration_online.bats @@ -33,7 +33,7 @@ setup_file() { assert [ $(count_files ${outdir} ${label}) -gt 0 ] # Check filenames - for file in $(ls_files ${outdir} ${label}); do + for file in $(find_files ${outdir} ${label}); do [[ "$(basename $file)" = GCF* ]] # filename starts with GCF_ done }