diff --git a/README.md b/README.md
index 624d036..d1445fa 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,9 @@
     <img src="https://img.shields.io/pypi/v/pyensembl.svg?maxAge=1000" alt="PyPI" />
 </a>
 
+# PyEnsembl
 
-PyEnsembl
-=======
-PyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files. 
+PyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files.
 
 # Example Usage
 
@@ -25,7 +24,7 @@ data = EnsemblRelease(77)
 gene_names = data.gene_names_at_locus(contig=6, position=29945884)
 
 # get all exons associated with HLA-A
-exon_ids  = data.exon_ids_of_gene_name('HLA-A')
+exon_ids = data.exon_ids_of_gene_name("HLA-A")
 ```
 
 # Installation
@@ -52,6 +51,7 @@ Alternatively, you can create the `EnsemblRelease` object from inside a Python
 process and call `ensembl_object.download()` followed by `ensembl_object.index()`.
 
 ## Cache Location
+
 By default, PyEnsembl uses the platform-specific `Cache` folder
 and caches the files into the `pyensembl` sub-directory.
 You can override this default by setting the environment key `PYENSEMBL_CACHE_DIR`
@@ -66,11 +66,11 @@ or
 ```python
 import os
 
-os.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir'
+os.environ["PYENSEMBL_CACHE_DIR"] = "/custom/cache/dir"
 # ... PyEnsembl API usage
 ```
 
-# Usage tips 
+# Usage tips
 
 ## List installed genomes
 
@@ -80,6 +80,7 @@ pyensembl list
 
 ```python
 from pyensembl.shell import collect_all_installed_ensembl_releases
+
 collect_all_installed_ensembl_releases()
 ```
 
@@ -87,10 +88,11 @@ collect_all_installed_ensembl_releases()
 
 ```python
 from pyensembl import EnsemblRelease
+
 data = EnsemblRelease(
     release=100,
-    species=find_species_by_name('drosophila_melanogaster'),
-    )
+    species=find_species_by_name("drosophila_melanogaster"),
+)
 ```
 
 ## Data structure
@@ -98,13 +100,13 @@ data = EnsemblRelease(
 ### Gene object
 
 ```python
-gene=data.gene_by_id(gene_id='FBgn0011747')
+gene = data.gene_by_id(gene_id="FBgn0011747")
 ```
 
 ### Transcript object
 
 ```python
-transcript=gene.transcripts[0]
+transcript = gene.transcripts[0]
 ```
 
 ### Protein information
@@ -125,11 +127,12 @@ For example:
 
 ```python
 from pyensembl import Genome
+
 data = Genome(
-    reference_name='GRCh38',
-    annotation_name='my_genome_features',
+    reference_name="GRCh38",
+    annotation_name="my_genome_features",
     # annotation_version=None,
-    gtf_path_or_url='/My/local/gtf/path_to_my_genome_features.gtf', # Path or URL of GTF file
+    gtf_path_or_url="/My/local/gtf/path_to_my_genome_features.gtf",  # Path or URL of GTF file
     # transcript_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing transcript sequences
     # protein_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing protein sequences
     # cache_directory_path=None, # Where to place downloaded and cached files for this genome
@@ -142,8 +145,8 @@ gene_names = data.gene_names_at_locus(contig=6, position=29945884)
 # API
 
 The `EnsemblRelease` object has methods to let you access all possible
-combinations of the annotation features *gene\_name*, *gene\_id*,
-*transcript\_name*, *transcript\_id*, *exon\_id* as well as the location of
+combinations of the annotation features _gene_name_, _gene_id_,
+_transcript_name_, _transcript_id_, _exon_id_ as well as the location of
 these genomic elements (contig, start position, end position, strand).
 
 ## Genes
diff --git a/docs/conf.py b/docs/conf.py
index bbc0aaf..aefddaa 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,47 +18,47 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.insert(0, os.path.abspath(".."))
 
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
+    "sphinx.ext.autodoc",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'pyensembl'
-copyright = u'2016, Hammer Lab'
-author = u'Hammer Lab'
+project = "pyensembl"
+copyright = "2016, Hammer Lab"
+author = "Hammer Lab"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = u'0.8.10'
+version = "0.8.10"
 # The full version, including alpha/beta/rc tags.
-release = u'0.8.10'
+release = "0.8.10"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -69,37 +69,37 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -109,156 +109,155 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'alabaster'
+html_theme = "alabaster"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'pyensembldoc'
+htmlhelp_basename = "pyensembldoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
+    # Latex figure (float) alignment
+    #'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'pyensembl.tex', u'pyensembl Documentation',
-     u'Hammer Lab', 'manual'),
+    (
+        master_doc,
+        "pyensembl.tex",
+        "pyensembl Documentation",
+        "Hammer Lab",
+        "manual",
+    ),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'pyensembl', u'pyensembl Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "pyensembl", "pyensembl Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -267,19 +266,25 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'pyensembl', u'pyensembl Documentation',
-     author, 'pyensembl', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "pyensembl",
+        "pyensembl Documentation",
+        author,
+        "pyensembl",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py
index eeb28fb..991af8c 100644
--- a/pyensembl/__init__.py
+++ b/pyensembl/__init__.py
@@ -10,27 +10,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .config import MAX_ENSEMBL_RELEASE, MAX_ENSEMBLGENOME_RELEASE
 from .database import Database
 from .download_cache import DownloadCache
 from .ensembl_release import EnsemblRelease, cached_release
-from .ensembl_release_versions import MAX_ENSEMBL_RELEASE
 from .exon import Exon
-from .genome import Genome
 from .gene import Gene
+from .genome import Genome
 from .locus import Locus
-from .reference_name import (
-    ensembl_grch36,
-    ensembl_grch37,
-    ensembl_grch38,
-    normalize_reference_name,
+from .reference_name import (  # ensembl_grch36,; ensembl_grch37,; ensembl_grch38,
     find_species_by_reference,
-    which_reference,
     genome_for_reference_name,
+    normalize_reference_name,
+    which_reference,
 )
-
 from .search import find_nearest_locus
 from .sequence_data import SequenceData
-from .species import find_species_by_name, check_species_object, normalize_species_name
+from .species import (
+    check_species_object,
+    find_species_by_name,
+    normalize_species_name,
+)
 from .transcript import Transcript
 from .version import __version__
 
@@ -41,6 +41,7 @@
     "EnsemblRelease",
     "cached_release",
     "MAX_ENSEMBL_RELEASE",
+    "MAX_ENSEMBLGENOME_RELEASE",
     "Gene",
     "Transcript",
     "Exon",
@@ -56,7 +57,7 @@
     "Genome",
     "Locus",
     "Exon",
-    "ensembl_grch36",
-    "ensembl_grch37",
-    "ensembl_grch38",
+    # "ensembl_grch36",
+    # "ensembl_grch37",
+    # "ensembl_grch38",
 ]
diff --git a/pyensembl/common.py b/pyensembl/common.py
index ccc5eb1..a9a3964 100644
--- a/pyensembl/common.py
+++ b/pyensembl/common.py
@@ -11,7 +11,6 @@
 # limitations under the License.
 
 import pickle
-
 from functools import wraps
 
 
@@ -28,10 +27,11 @@ def load_pickle(filepath):
 
 
 def _memoize_cache_key(args, kwargs):
-    """Turn args tuple and kwargs dictionary into a hashable key.
+    """
+    Turn args tuple and kwargs dictionary into a hashable key.
 
-    Expects that all arguments to a memoized function are either hashable
-    or can be uniquely identified from type(arg) and repr(arg).
+    Expects that all arguments to a memoized function are either
+    hashable or can be uniquely identified from type(arg) and repr(arg).
     """
     cache_key_list = []
 
@@ -51,9 +51,9 @@ def _memoize_cache_key(args, kwargs):
 
 
 def memoize(fn):
-    """Simple reset-able memoization decorator for functions and methods,
-    assumes that all arguments to the function can be hashed and
-    compared.
+    """
+    Simple reset-able memoization decorator for functions and methods, assumes
+    that all arguments to the function can be hashed and compared.
     """
     cache = {}
 
diff --git a/pyensembl/config.py b/pyensembl/config.py
new file mode 100644
index 0000000..faaa3a5
--- /dev/null
+++ b/pyensembl/config.py
@@ -0,0 +1,181 @@
+# TODO: save the config in YMAL file, or TOML file?
+
+MIN_ENSEMBL_RELEASE = 54
+MAX_ENSEMBL_RELEASE = 110
+MIN_ENSEMBLGENOME_RELEASE = 50
+MAX_ENSEMBLGENOME_RELEASE = 57
+
+
+SPECIES_DATA = [
+    {
+        "latin_name": "homo_sapiens",
+        "synonyms": ["human"],
+        "reference_assemblies": {
+            "NCBI36": (54, 54),
+            "GRCh37": (55, 75),
+            "GRCh38": (76, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "mus_musculus",
+        "synonyms": ["mouse", "house mouse"],
+        "reference_assemblies": {
+            "NCBIM37": (54, 67),
+            "GRCm38": (68, 102),
+            "GRCm39": (103, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "canis_familiaris",
+        "synonyms": ["dog"],
+        "reference_assemblies": {"CanFam3.1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "felis_catus",
+        "synonyms": ["cat"],
+        "reference_assemblies": {
+            "Felis_catus_6.2": (75, 90),
+            "Felis_catus_8.0": (91, 92),
+            "Felis_catus_9.0": (93, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "gallus_gallus",
+        "synonyms": ["chicken"],
+        "reference_assemblies": {
+            "Galgal4": (75, 85),
+            "Gallus_gallus-5.0": (86, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "rattus_norvegicus",
+        "synonyms": ["rat", "brown_rat", "lab_rat"],
+        "reference_assemblies": {
+            "Rnor_5.0": (75, 79),
+            "Rnor_6.0": (80, 104),
+            "mRatBN7.2": (105, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "macaca_fascicularis",
+        "synonyms": ["macaque", "Crab-eating_macaque"],
+        "reference_assemblies": {
+            "Macaca_fascicularis_6.0": (103, MAX_ENSEMBL_RELEASE)
+        },
+    },
+    {
+        "latin_name": "chlorocebus_sabaeus",
+        "synonyms": ["green_monkey", "african_green_monkey"],
+        "reference_assemblies": {"ChlSab1.1": (86, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "macaca_mulatta",
+        "synonyms": ["rhesus"],
+        "reference_assemblies": {"Mmul_10": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "oryctolagus_cuniculus",
+        "synonyms": ["rabbit"],
+        "reference_assemblies": {"OryCun2.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "meriones_unguiculatus",
+        "synonyms": ["gerbil"],
+        "reference_assemblies": {"MunDraft-v1.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "mesocricetus_auratus",
+        "synonyms": ["syrian_hamster"],
+        "reference_assemblies": {"MesAur1.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "cricetulus_griseus_chok1gshd",
+        "synonyms": ["chinese_hamster"],
+        "reference_assemblies": {"CHOK1GS_HDv1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "heterocephalus_glaber_female",
+        "synonyms": ["naked_mole_rat"],
+        "reference_assemblies": {
+            "HetGla_female_1.0": (75, MAX_ENSEMBL_RELEASE)
+        },
+    },
+    {
+        "latin_name": "cavia_porcellus",
+        "synonyms": ["guinea_pig"],
+        "reference_assemblies": {"Cavpor3.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "sus_scrofa",
+        "synonyms": ["pig"],
+        "reference_assemblies": {"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "danio_rerio",
+        "synonyms": ["zebrafish"],
+        "reference_assemblies": {
+            "Zv8": (54, 59),
+            "Zv9": (60, 79),
+            "GRCz10": (80, 91),
+            "GRCz11": (92, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "drosophila_melanogaster",
+        "synonyms": ["drosophila", "fruit fly", "fly"],
+        "reference_assemblies": {
+            "BDGP5": (75, 78),
+            "BDGP6": (79, 95),
+            "BDGP6.22": (96, 98),
+            "BDGP6.28": (99, 102),
+            "BDGP6.32": (103, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "caenorhabditis_elegans",
+        "synonyms": ["nematode", "C_elegans"],
+        "reference_assemblies": {
+            "WS200": (55, 57),
+            "WS210": (58, 60),
+            "WS220": (61, 66),
+            "WBcel235": (67, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "saccharomyces_cerevisiae",
+        "synonyms": ["yeast", "budding_yeast"],
+        "reference_assemblies": {"R64-1-1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "arabidopsis_thaliana",
+        "synonyms": ["cress", "thale_cress", "hehe"],
+        "reference_assemblies": {
+            "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
+    {
+        "latin_name": "oryza_sativa",
+        "synonyms": ["rice"],
+        "reference_assemblies": {
+            "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
+    {
+        "latin_name": "zea_mays",
+        "synonyms": ["maize"],
+        "reference_assemblies": {
+            "Zm-B73-REFERENCE-NAM-5.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
+    {
+        "latin_name": "glycine_max",
+        "synonyms": ["soybean"],
+        "reference_assemblies": {
+            "Glycine_max_v2.1": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
+]
diff --git a/pyensembl/database.py b/pyensembl/database.py
index 4286908..b5fcd99 100644
--- a/pyensembl/database.py
+++ b/pyensembl/database.py
@@ -11,16 +11,16 @@
 # limitations under the License.
 
 import logging
-from os.path import split, join, exists, splitext
 import sqlite3
+from os.path import exists, join, split, splitext
 
 import datacache
+from gtfparse import create_missing_features, read_gtf
 from typechecks import require_integer, require_string
-from gtfparse import read_gtf, create_missing_features
 
 from .common import memoize
-from .normalization import normalize_chromosome, normalize_strand
 from .locus import Locus
+from .normalization import normalize_chromosome, normalize_strand
 
 # any time we update the database schema, increment this version number
 DATABASE_SCHEMA_VERSION = 3
@@ -31,9 +31,9 @@
 
 class Database(object):
     """
-    Wrapper around sqlite3 database so that the rest of the
-    library doesn't have to worry about constructing the .db file or
-    writing SQL queries directly.
+    Wrapper around sqlite3 database so that the rest of the library doesn't
+    have to worry about constructing the .db file or writing SQL queries
+    directly.
     """
 
     def __init__(
@@ -104,8 +104,8 @@ def local_db_path(self):
 
     def _all_possible_indices(self, column_names):
         """
-        Create list of tuples containing all possible index groups
-        we might want to create over tables in this database.
+        Create list of tuples containing all possible index groups we might
+        want to create over tables in this database.
 
         If a set of genome annotations is missing some column we want
         to index on, we have to drop any indices which use that column.
@@ -136,7 +136,8 @@ def _all_possible_indices(self, column_names):
                 # other GTFs)
                 if column_name not in column_set:
                     logger.info(
-                        "Skipping database index for {%s}", ", ".join(column_group)
+                        "Skipping database index for {%s}",
+                        ", ".join(column_group),
                     )
                     skip = True
             if skip:
@@ -149,7 +150,8 @@ def _all_possible_indices(self, column_names):
     PRIMARY_KEY_COLUMNS = {"gene": "gene_id", "transcript": "transcript_id"}
 
     def _get_primary_key(self, feature_name, feature_df):
-        """Name of primary key for a feature table (e.g. "gene" -> "gene_id")
+        """
+        Name of primary key for a feature table (e.g. "gene" -> "gene_id")
 
         Since we're potentially going to run this code over unseen data,
         make sure that the primary is unique and never null.
@@ -163,18 +165,21 @@ def _get_primary_key(self, feature_name, feature_df):
         if primary_key_values.isnull().any():
             raise ValueError(
                 "Column '%s' can't be primary key of table '%s'"
-                " because it contains nulls values" % (primary_key, feature_name)
+                " because it contains nulls values"
+                % (primary_key, feature_name)
             )
         elif len(primary_key_values.unique()) < len(primary_key_values):
             raise ValueError(
                 "Column '%s' can't be primary key of table '%s'"
-                " because it contains repeated values" % (primary_key, feature_name)
+                " because it contains repeated values"
+                % (primary_key, feature_name)
             )
         else:
             return primary_key
 
     def _feature_indices(self, all_index_groups, primary_key, feature_df):
-        """Choose subset of index group tuples from `all_index_groups` which are
+        """
+        Choose subset of index group tuples from `all_index_groups` which are
         applicable to a particular feature (not same as its primary key, have
         non-null values).
         """
@@ -194,9 +199,8 @@ def _feature_indices(self, all_index_groups, primary_key, feature_df):
 
     def create(self, overwrite=False):
         """
-        Create the local database (including indexing) if it's not
-        already set up. If `overwrite` is True, always re-create
-        the database from scratch.
+        Create the local database (including indexing) if it's not already set
+        up. If `overwrite` is True, always re-create the database from scratch.
 
         Returns a connection to the database.
         """
@@ -204,8 +208,19 @@ def create(self, overwrite=False):
         datacache.ensure_dir(self.cache_directory_path)
 
         df = self._load_gtf_as_dataframe(
-            usecols=self.restrict_gtf_columns, features=self.restrict_gtf_features
+            usecols=self.restrict_gtf_columns,
+            features=self.restrict_gtf_features,
         )
+        # Some species such as maize, do not have a gene_name and transcript_name
+        # but do have gene_id and transcript_id, use the as alias of names
+        if "gene_id" in df.columns and "gene_name" not in df.columns:
+            df["gene_name"] = df["gene_id"]
+        if (
+            "transcript_id" in df.columns
+            and "transcript_name" not in df.columns
+        ):
+            df["transcript_name"] = df["transcript_id"]
+
         all_index_groups = self._all_possible_indices(df.columns)
 
         if self.restrict_gtf_features:
@@ -261,7 +276,7 @@ def _get_connection(self):
     @property
     def connection(self):
         """
-        Get a connection to the database or raise an exception
+        Get a connection to the database or raise an exception.
         """
         connection = self._get_connection()
         if connection:
@@ -275,6 +290,7 @@ def connection(self):
     def connect_or_create(self, overwrite=False):
         """
         Return a connection to the database if it exists, otherwise create it.
+
         Overwrite the existing database if `overwrite` is True.
         """
         connection = self._get_connection()
@@ -306,8 +322,8 @@ def column_values_at_locus(
         sorted=False,
     ):
         """
-        Get the non-null values of a column from the database
-        at a particular range of loci
+        Get the non-null values of a column from the database at a particular
+        range of loci.
         """
 
         # TODO: combine with the query method, since they overlap
@@ -408,8 +424,8 @@ def distinct_column_values_at_locus(
 
     def run_sql_query(self, sql, required=False, query_params=[]):
         """
-        Given an arbitrary SQL query, run it against the database
-        and return the results.
+        Given an arbitrary SQL query, run it against the database and return
+        the results.
 
         Parameters
         ----------
@@ -454,8 +470,8 @@ def query(
         required=False,
     ):
         """
-        Construct a SQL query and run against the sqlite3 database,
-        filtered both by the feature type and a user-provided column/value.
+        Construct a SQL query and run against the sqlite3 database, filtered
+        both by the feature type and a user-provided column/value.
         """
         sql = """
             SELECT %s%s
@@ -468,7 +484,9 @@ def query(
             filter_column,
         )
         query_params = [filter_value]
-        return self.run_sql_query(sql, required=required, query_params=query_params)
+        return self.run_sql_query(
+            sql, required=required, query_params=query_params
+        )
 
     def query_one(
         self,
@@ -490,7 +508,9 @@ def query_one(
 
         if len(results) == 0:
             if required:
-                raise ValueError("%s not found: %s" % (filter_column, filter_value))
+                raise ValueError(
+                    "%s not found: %s" % (filter_column, filter_value)
+                )
             else:
                 return None
         elif len(results) > 1:
@@ -505,8 +525,8 @@ def query_feature_values(
         self, column, feature, distinct=True, contig=None, strand=None
     ):
         """
-        Run a SQL query against the sqlite3 database, filtered
-        only on the feature type.
+        Run a SQL query against the sqlite3 database, filtered only on the
+        feature type.
         """
         query = """
             SELECT %s%s
@@ -541,7 +561,6 @@ def query_loci(self, filter_column, filter_value, feature):
         """
         Query for loci satisfying a given filter and feature type.
 
-
         Parameters
         ----------
         filter_column : str
@@ -571,8 +590,8 @@ def query_loci(self, filter_column, filter_value, feature):
 
     def query_locus(self, filter_column, filter_value, feature):
         """
-        Query for unique locus, raises error if missing or more than
-        one locus in the database.
+        Query for unique locus, raises error if missing or more than one locus
+        in the database.
 
         Parameters
         ----------
@@ -588,7 +607,9 @@ def query_locus(self, filter_column, filter_value, feature):
         Returns single Locus object.
         """
         loci = self.query_loci(
-            filter_column=filter_column, filter_value=filter_value, feature=feature
+            filter_column=filter_column,
+            filter_value=filter_value,
+            feature=feature,
         )
 
         if len(loci) == 0:
@@ -605,7 +626,7 @@ def query_locus(self, filter_column, filter_value, feature):
 
     def _load_gtf_as_dataframe(self, usecols=None, features=None):
         """
-        Parse this genome source's GTF file and load it as a Pandas DataFrame
+        Parse this genome source's GTF file and load it as a Pandas DataFrame.
         """
         logger.info("Reading GTF from %s", self.gtf_path)
         df = read_gtf(
@@ -621,7 +642,9 @@ def _load_gtf_as_dataframe(self, usecols=None, features=None):
 
         column_names = set(df.keys())
         expect_gene_feature = features is None or "gene" in features
-        expect_transcript_feature = features is None or "transcript" in features
+        expect_transcript_feature = (
+            features is None or "transcript" in features
+        )
         observed_features = set(df["feature"])
 
         # older Ensembl releases don't have "gene" or "transcript"
@@ -635,7 +658,9 @@ def _load_gtf_as_dataframe(self, usecols=None, features=None):
                 dataframe=df,
                 unique_keys={"gene": "gene_id"},
                 extra_columns={
-                    "gene": {"gene_name", "gene_biotype"}.intersection(column_names),
+                    "gene": {"gene_name", "gene_biotype"}.intersection(
+                        column_names
+                    ),
                 },
                 missing_value="",
             )
diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py
index c33d6fe..48ebd00 100644
--- a/pyensembl/download_cache.py
+++ b/pyensembl/download_cache.py
@@ -11,14 +11,13 @@
 # limitations under the License.
 
 
+import logging
 from os import listdir, remove
-from os.path import join, exists, split, abspath, isdir
+from os.path import abspath, exists, isdir, join, split
 from shutil import copy2, rmtree
-import logging
 
 import datacache
 
-
 logger = logging.getLogger(__name__)
 
 CACHE_BASE_SUBDIR = "pyensembl"
@@ -29,9 +28,11 @@ def cache_subdirectory(
     reference_name=None, annotation_name=None, annotation_version=None
 ):
     """
-    Which cache subdirectory to use for a given annotation database
-    over a particular reference. All arguments can be omitted to just get
-    the base subdirectory for all pyensembl cached datasets.
+    Which cache subdirectory to use for a given annotation database over a
+    particular reference.
+
+    All arguments can be omitted to just get the base subdirectory for
+    all pyensembl cached datasets.
     """
     if reference_name is None:
         reference_name = ""
@@ -135,7 +136,7 @@ def cache_directory_path(self):
 
     def _fields(self):
         """
-        Fields used for hashing, string representation, equality comparison
+        Fields used for hashing, string representation, equality comparison.
         """
         return (
             (
@@ -150,7 +151,10 @@ def _fields(self):
         )
 
     def __eq__(self, other):
-        return other.__class__ is DownloadCache and self._fields() == other._fields()
+        return (
+            other.__class__ is DownloadCache
+            and self._fields() == other._fields()
+        )
 
     def __hash__(self):
         return hash(self._fields())
@@ -202,7 +206,9 @@ def cached_path(self, path_or_url):
             # for stripping decompression extensions for both local
             # and remote files
             local_filename = datacache.build_local_filename(
-                download_url=path_or_url, filename=remote_filename, decompress=False
+                download_url=path_or_url,
+                filename=remote_filename,
+                decompress=False,
             )
         else:
             local_filename = remote_filename
@@ -210,10 +216,14 @@ def cached_path(self, path_or_url):
         # if we expect the download function to decompress this file then
         # we should use its name without the compression extension
         if self.decompress_on_download:
-            local_filename = self._remove_compression_suffix_if_present(local_filename)
+            local_filename = self._remove_compression_suffix_if_present(
+                local_filename
+            )
 
         if len(local_filename) == 0:
-            raise ValueError("Can't determine local filename for %s" % (path_or_url,))
+            raise ValueError(
+                "Can't determine local filename for %s" % (path_or_url,)
+            )
 
         return join(self.cache_directory_path, local_filename)
 
@@ -254,8 +264,8 @@ def download_or_copy_if_necessary(
         self, path_or_url, download_if_missing=False, overwrite=False
     ):
         """
-        Download a remote file or copy
-        Get the local path to a possibly remote file.
+        Download a remote file or copy Get the local path to a possibly remote
+        file.
 
         Download if file is missing from the cache directory and
         `download_if_missing` is True. Download even if local file exists if
@@ -295,7 +305,11 @@ def _raise_missing_file_error(self, missing_urls_dict):
         raise ValueError(error_message)
 
     def local_path_or_install_error(
-        self, field_name, path_or_url, download_if_missing=False, overwrite=False
+        self,
+        field_name,
+        path_or_url,
+        download_if_missing=False,
+        overwrite=False,
     ):
         try:
             return self.download_or_copy_if_necessary(
@@ -308,13 +322,13 @@ def local_path_or_install_error(
 
     def delete_cached_files(self, prefixes=[], suffixes=[]):
         """
-        Deletes any cached files matching the prefixes or suffixes given
+        Deletes any cached files matching the prefixes or suffixes given.
         """
         if isdir(self.cache_directory_path):
             for filename in listdir():
-                delete = any([filename.endswith(ext) for ext in suffixes]) or any(
-                    [filename.startswith(pre) for pre in prefixes]
-                )
+                delete = any(
+                    [filename.endswith(ext) for ext in suffixes]
+                ) or any([filename.startswith(pre) for pre in prefixes])
                 if delete:
                     path = join(self.cache_directory_path, filename)
                     logger.info("Deleting %s", path)
diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 8ad47ab..497b503 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -11,34 +11,23 @@
 # limitations under the License.
 
 """
-Contains the EnsemblRelease class, which extends the Genome class
-to be specific to (a particular release of) Ensembl.
+Contains the EnsemblRelease class, which extends the Genome class to be
+specific to (a particular release of) Ensembl.
 """
 from weakref import WeakValueDictionary
 
+from .ensembl_release_versions import check_release_number
+from .ensembl_url_templates import make_fasta_url, make_gtf_url
 from .genome import Genome
-from .ensembl_release_versions import check_release_number, MAX_ENSEMBL_RELEASE
 from .species import check_species_object, human
 
-from .ensembl_url_templates import ENSEMBL_FTP_SERVER, make_gtf_url, make_fasta_url
-
 
 class EnsemblRelease(Genome):
     """
-    Bundles together the genomic annotation and sequence data associated with
-    a particular release of the Ensembl database.
+    Bundles together the genomic annotation and sequence data associated with a
+    particular release of the Ensembl database.
     """
 
-    @classmethod
-    def normalize_init_values(cls, release, species, server):
-        """
-        Normalizes the arguments which uniquely specify an EnsemblRelease
-        genome.
-        """
-        release = check_release_number(release)
-        species = check_species_object(species)
-        return (release, species, server)
-
     # Using a WeakValueDictionary instead of an ordinary dict to prevent a
     # memory leak in cases where we test many different releases in sequence.
     # When all the references to a particular EnsemblRelease die then that
@@ -47,13 +36,21 @@ def normalize_init_values(cls, release, species, server):
 
     @classmethod
     def cached(
-        cls, release=MAX_ENSEMBL_RELEASE, species=human, server=ENSEMBL_FTP_SERVER
+        cls,
+        release=None,
+        species=human,
+        database=None,
+        server=None,
+        # server=ENSEMBL_FTP_SERVER,
     ):
         """
         Construct EnsemblRelease if it's never been made before, otherwise
         return an old instance.
         """
-        init_args_tuple = cls.normalize_init_values(release, species, server)
+        species = check_species_object(species)
+        release = check_release_number(release, species.database)
+        init_args_tuple = (release, species, database, server)
+
         if init_args_tuple in cls._genome_cache:
             genome = cls._genome_cache[init_args_tuple]
         else:
@@ -61,14 +58,23 @@ def cached(
         return genome
 
     def __init__(
-        self, release=MAX_ENSEMBL_RELEASE, species=human, server=ENSEMBL_FTP_SERVER
+        self,
+        release=None,
+        species=human,
+        database=None,
+        server=None,
+        # server=EMBL_FTP_SERVER,,
     ):
-        self.release, self.species, self.server = self.normalize_init_values(
-            release=release, species=species, server=server
-        )
+        self.species = check_species_object(species)
+        self.release = check_release_number(release, self.species.database)
+        self.database = database
+        self.server = server
 
         self.gtf_url = make_gtf_url(
-            ensembl_release=self.release, species=self.species, server=self.server
+            ensembl_release=self.release,
+            species=self.species.latin_name,
+            server=self.server,
+            database=self.species.database,
         )
 
         self.transcript_fasta_urls = [
@@ -77,12 +83,14 @@ def __init__(
                 species=self.species.latin_name,
                 sequence_type="cdna",
                 server=server,
+                database=self.species.database,
             ),
             make_fasta_url(
                 ensembl_release=self.release,
                 species=self.species.latin_name,
                 sequence_type="ncrna",
                 server=server,
+                database=self.species.database,
             ),
         ]
 
@@ -92,6 +100,7 @@ def __init__(
                 species=self.species.latin_name,
                 sequence_type="pep",
                 server=self.server,
+                database=self.species.database,
             )
         ]
 
@@ -130,7 +139,11 @@ def __hash__(self):
         return hash((self.release, self.species))
 
     def to_dict(self):
-        return {"release": self.release, "species": self.species, "server": self.server}
+        return {
+            "release": self.release,
+            "species": self.species,
+            "server": self.server,
+        }
 
     @classmethod
     def from_dict(cls, state_dict):
@@ -144,7 +157,9 @@ def cached_release(release, species="human"):
     """
     Create an EnsemblRelease instance only if it's hasn't already been made,
     otherwise returns the old instance.
-    Keeping this function for backwards compatibility but this functionality
-    has been moving into the cached method of EnsemblRelease.
+
+    Keeping this function for backwards compatibility but this
+    functionality has been moving into the cached method of
+    EnsemblRelease.
     """
     return EnsemblRelease.cached(release=release, species=species)
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 79649bd..246a380 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -10,23 +10,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-MIN_ENSEMBL_RELEASE = 54
-MAX_ENSEMBL_RELEASE = 110
+from .config import (
+    MAX_ENSEMBL_RELEASE,
+    MAX_ENSEMBLGENOME_RELEASE,
+    MIN_ENSEMBL_RELEASE,
+    MIN_ENSEMBLGENOME_RELEASE,
+)
 
 
-def check_release_number(release):
+def check_release_number(release, database=None):
     """
-    Check to make sure a release is in the valid range of
-    Ensembl releases.
+    Check to make sure a release is in the valid range of Ensembl releases.
     """
+    if release is None:
+        return (
+            MAX_ENSEMBL_RELEASE
+            if database is None
+            else MAX_ENSEMBLGENOME_RELEASE
+        )
     try:
         release = int(release)
-    except:
+    except ValueError:
         raise ValueError("Invalid Ensembl release: %s" % release)
-
-    if release < MIN_ENSEMBL_RELEASE:
+    if database is None:
+        min_release = MIN_ENSEMBL_RELEASE
+    else:
+        min_release = MIN_ENSEMBLGENOME_RELEASE
+    if release < min_release:
         raise ValueError(
             "Invalid Ensembl releases %d, must be greater than %d"
-            % (release, MIN_ENSEMBL_RELEASE)
+            % (release, min_release)
         )
     return release
diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index ded3570..e00968b 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -11,19 +11,23 @@
 # limitations under the License.
 
 """
-Templates for URLs and paths to specific relase, species, and file type
-on the Ensembl ftp server.
+Templates for URLs and paths to specific relase, species, and file type on the
+Ensembl ftp server.
 
 For example, the human chromosomal DNA sequences for release 78 are in:
 
     https://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/dna/
 
+For plant, fungi and metazoa species, the url is as follow:
+
+    https://ftp.ensemblgenomes.ebi.ac.uk/pub/release-57/plants/fasta/glycine_max/cdna/
 """
 
-from .species import Species, find_species_by_name
 from .ensembl_release_versions import check_release_number
+from .species import Species, find_species_by_name
 
 ENSEMBL_FTP_SERVER = "https://ftp.ensembl.org"
+ENSEMBLGENOME_FTP_SERVER = "https://ftp.ensemblgenomes.ebi.ac.uk"
 
 # Example directories
 # FASTA files: /pub/release-78/fasta/homo_sapiens/
@@ -31,27 +35,58 @@
 FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
 GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
 
+DATABASE_FASTA_SUBDIR_TEMPLATE = (
+    "/pub/release-%(release)d/%(database)s/fasta/%(species)s/%(type)s/"
+)
+DATABASE_GTF_SUBDIR_TEMPLATE = (
+    "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
+)
+
+# GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
+GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
+
+# cDNA & protein FASTA file for releases before (and including) Ensembl 75
+# example: Homo_sapiens.NCBI36.54.cdna.all.fa.gz
+OLD_FASTA_FILENAME_TEMPLATE = (
+    "%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz"
+)
+
+# ncRNA FASTA file for releases before (and including) Ensembl 75
+# example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
+
+OLD_FASTA_FILENAME_TEMPLATE_NCRNA = (
+    "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
+)
+
+# cDNA & protein FASTA file for releases after Ensembl 75
+# example: Homo_sapiens.GRCh37.cdna.all.fa.gz
+NEW_FASTA_FILENAME_TEMPLATE = (
+    "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
+)
+
+# ncRNA FASTA file for releases after Ensembl 75
+# example: Homo_sapiens.GRCh37.ncrna.fa.gz
+NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
+
 
 def normalize_release_properties(ensembl_release, species):
     """
     Make sure a given release is valid, normalize it to be an integer,
     normalize the species name, and get its associated reference.
     """
-    ensembl_release = check_release_number(ensembl_release)
     if not isinstance(species, Species):
         species = find_species_by_name(species)
+    ensembl_release = check_release_number(
+        ensembl_release, database=species.database
+    )
     reference_name = species.which_reference(ensembl_release)
     return ensembl_release, species.latin_name, reference_name
 
 
-# GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
-GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
-
-
 def make_gtf_filename(ensembl_release, species):
     """
     Return GTF filename expect on Ensembl FTP server for a specific
-    species/release combination
+    species/release combination.
     """
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
@@ -63,41 +98,45 @@ def make_gtf_filename(ensembl_release, species):
     }
 
 
-def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER):
+def make_gtf_url(ensembl_release, species, server=None, database=None):
     """
     Returns a URL and a filename, which can be joined together.
     """
-    ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
-    subdir = GTF_SUBDIR_TEMPLATE % {"release": ensembl_release, "species": species}
-    filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
+    if server is None:
+        if database is None:
+            server = ENSEMBL_FTP_SERVER
+        else:
+            server = ENSEMBLGENOME_FTP_SERVER
+    ensembl_release, species, _ = normalize_release_properties(
+        ensembl_release, species
+    )
+    if database is None:
+        subdir = GTF_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "species": species,
+        }
+    else:
+        subdir = DATABASE_GTF_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "database": database,
+            "species": species,
+        }
+    filename = make_gtf_filename(
+        ensembl_release=ensembl_release, species=species
+    )
     return server + subdir + filename
 
 
-# cDNA & protein FASTA file for releases before (and including) Ensembl 75
-# example: Homo_sapiens.NCBI36.54.cdna.all.fa.gz
-OLD_FASTA_FILENAME_TEMPLATE = (
-    "%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz"
-)
-
-# ncRNA FASTA file for releases before (and including) Ensembl 75
-# example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
-
-OLD_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
-
-# cDNA & protein FASTA file for releases after Ensembl 75
-# example: Homo_sapiens.GRCh37.cdna.all.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
-
-# ncRNA FASTA file for releases after Ensembl 75
-# example: Homo_sapiens.GRCh37.ncrna.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
-
-
-def make_fasta_filename(ensembl_release, species, sequence_type):
+def make_fasta_filename(ensembl_release, species, database, sequence_type):
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
     )
-    if ensembl_release <= 75:
+    # for plant database, start from release 32 (inlcude 32) , the fasta file use the "old name"
+    # for releses before 31, the fasta file use the "new name"
+    # version 31 use both old and new name
+    if (ensembl_release <= 75 and database is None) or (
+        ensembl_release <= 31 and database is not None
+    ):
         if sequence_type == "ncrna":
             return OLD_FASTA_FILENAME_TEMPLATE_NCRNA % {
                 "Species": species.capitalize(),
@@ -125,23 +164,47 @@ def make_fasta_filename(ensembl_release, species, sequence_type):
             }
 
 
-def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_SERVER):
-    """Construct URL to FASTA file with cDNA transcript or protein sequences
+def make_fasta_url(
+    ensembl_release,
+    species,
+    sequence_type,
+    server=None,
+    database=None,
+):
+    """
+    Construct URL to FASTA file with cDNA transcript or protein sequences.
 
     Parameter examples:
         ensembl_release = 75
         species = "Homo_sapiens"
         sequence_type = "cdna" (other option: "pep")
     """
-    ensembl_release, species, reference_name = normalize_release_properties(
+    if server is None:
+        if database is None:
+            server = ENSEMBL_FTP_SERVER
+        else:
+            server = ENSEMBLGENOME_FTP_SERVER
+    ensembl_release, species, _ = normalize_release_properties(
         ensembl_release, species
     )
-    subdir = FASTA_SUBDIR_TEMPLATE % {
-        "release": ensembl_release,
-        "species": species,
-        "type": sequence_type,
-    }
+    if database is None:
+        subdir = FASTA_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "species": species,
+            "type": sequence_type,
+        }
+    else:
+        subdir = DATABASE_FASTA_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "database": database,
+            "species": species,
+            "type": sequence_type,
+        }
+
     filename = make_fasta_filename(
-        ensembl_release=ensembl_release, species=species, sequence_type=sequence_type
+        ensembl_release=ensembl_release,
+        species=species,
+        database=database,
+        sequence_type=sequence_type,
     )
     return server + subdir + filename
diff --git a/pyensembl/exon.py b/pyensembl/exon.py
index a520290..a84b75f 100644
--- a/pyensembl/exon.py
+++ b/pyensembl/exon.py
@@ -15,7 +15,9 @@
 
 
 class Exon(Locus):
-    def __init__(self, exon_id, contig, start, end, strand, gene_name, gene_id):
+    def __init__(
+        self, exon_id, contig, start, end, strand, gene_name, gene_id
+    ):
         Locus.__init__(self, contig, start, end, strand)
         self.exon_id = exon_id
         self.gene_name = gene_name
diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index e339a8a..b55750b 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -19,9 +19,8 @@
 """
 
 
-from gzip import GzipFile
 import logging
-
+from gzip import GzipFile
 
 logger = logging.getLogger(__name__)
 
@@ -33,7 +32,8 @@ def _parse_header_id(line):
     """
     if type(line) is not bytes:
         raise TypeError(
-            "Expected header line to be of type %s but got %s" % (bytes, type(line))
+            "Expected header line to be of type %s but got %s"
+            % (bytes, type(line))
         )
 
     if len(line) <= 1:
diff --git a/pyensembl/gene.py b/pyensembl/gene.py
index f26de48..b787c64 100644
--- a/pyensembl/gene.py
+++ b/pyensembl/gene.py
@@ -17,7 +17,9 @@
 
 
 class Gene(LocusWithGenome):
-    def __init__(self, gene_id, gene_name, contig, start, end, strand, biotype, genome):
+    def __init__(
+        self, gene_id, gene_name, contig, start, end, strand, biotype, genome
+    ):
         LocusWithGenome.__init__(
             self,
             contig=contig,
@@ -98,7 +100,8 @@ def transcripts(self):
         # its particular information, might be more efficient if we
         # just get all the columns here, but how do we keep that modular?
         return [
-            self.genome.transcript_by_id(result[0]) for result in transcript_id_results
+            self.genome.transcript_by_id(result[0])
+            for result in transcript_id_results
         ]
 
     @memoized_property
diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index 05b6efc..a5e202d 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -11,8 +11,8 @@
 # limitations under the License.
 
 """
-Contains the Genome class, with its millions of accessors and wrappers
-around an arbitrary genomic database.
+Contains the Genome class, with its millions of accessors and wrappers around
+an arbitrary genomic database.
 """
 
 
@@ -21,8 +21,8 @@
 
 from serializable import Serializable
 
-from .download_cache import DownloadCache
 from .database import Database
+from .download_cache import DownloadCache
 from .exon import Exon
 from .gene import Gene
 from .sequence_data import SequenceData
@@ -31,8 +31,8 @@
 
 class Genome(Serializable):
     """
-    Bundles together the genomic annotation and sequence data associated with
-    a particular genomic database source (e.g. a single Ensembl release) and
+    Bundles together the genomic annotation and sequence data associated with a
+    particular genomic database source (e.g. a single Ensembl release) and
     provides a wide variety of helper methods for accessing this data.
     """
 
@@ -148,7 +148,7 @@ def to_dict(self):
 
     def _init_lazy_fields(self):
         """
-        Member data that gets loaded or constructed on demand
+        Member data that gets loaded or constructed on demand.
         """
         self.gtf_path = None
         self._protein_sequences = None
@@ -163,11 +163,15 @@ def _init_lazy_fields(self):
         self._exons = {}
 
     def _get_cached_path(
-        self, field_name, path_or_url, download_if_missing=False, overwrite=False
+        self,
+        field_name,
+        path_or_url,
+        download_if_missing=False,
+        overwrite=False,
     ):
         """
-        Get the local path for a possibly remote file, invoking either
-        a download or install error message if it's missing.
+        Get the local path for a possibly remote file, invoking either a
+        download or install error message if it's missing.
         """
         if len(field_name) == 0:
             raise ValueError("Expected non-empty field name")
@@ -188,7 +192,9 @@ def _get_gtf_path(self, download_if_missing=False, overwrite=False):
             overwrite=overwrite,
         )
 
-    def _get_transcript_fasta_paths(self, download_if_missing=False, overwrite=False):
+    def _get_transcript_fasta_paths(
+        self, download_if_missing=False, overwrite=False
+    ):
         if not self.requires_transcript_fasta:
             raise ValueError("No transcript FASTA source for %s" % self)
         return [
@@ -201,7 +207,9 @@ def _get_transcript_fasta_paths(self, download_if_missing=False, overwrite=False
             for path in self._transcript_fasta_paths_or_urls
         ]
 
-    def _get_protein_fasta_paths(self, download_if_missing=False, overwrite=False):
+    def _get_protein_fasta_paths(
+        self, download_if_missing=False, overwrite=False
+    ):
         # get the path for peptide FASTA files containing
         # this genome's protein sequences
         if not self.requires_protein_fasta:
@@ -233,7 +241,9 @@ def _set_local_paths(self, download_if_missing=True, overwrite=False):
     def required_local_files(self):
         paths = []
         if self._gtf_path_or_url:
-            paths.append(self.download_cache.cached_path(self._gtf_path_or_url))
+            paths.append(
+                self.download_cache.cached_path(self._gtf_path_or_url)
+            )
         if self._transcript_fasta_paths_or_urls:
             paths.extend(
                 [
@@ -273,8 +283,8 @@ def download(self, overwrite=False):
     def index(self, overwrite=False):
         """
         Assuming that all necessary data for this Genome has been downloaded,
-        generate the GTF database and save efficient representation of
-        FASTA sequence files.
+        generate the GTF database and save efficient representation of FASTA
+        sequence files.
         """
         if self.requires_gtf:
             self.db.connect_or_create(overwrite=overwrite)
@@ -291,10 +301,13 @@ def db(self):
             # make sure GTF file exists locally
             # and populate self.gtf_path
             self._set_local_paths(
-                download_if_missing=True, ## if set at False the files are not downloaded in interactive python, works anyways via command line though
-                overwrite=False)
+                download_if_missing=True,  ## if set at False the files are not downloaded in interactive python, works anyways via command line though
+                overwrite=False,
+            )
             if self.gtf_path is None:
-                raise ValueError("Property 'gtf_path' of %s cannot be None" % self)
+                raise ValueError(
+                    "Property 'gtf_path' of %s cannot be None" % self
+                )
 
             # Database object turns the GTF dataframes into sqlite3 tables
             # and wraps them with methods like `query_one`
@@ -347,7 +360,8 @@ def protein_sequences(self):
             self._set_local_paths(download_if_missing=False, overwrite=False)
             if self.protein_fasta_paths is None:
                 raise ValueError(
-                    "Property 'protein_fasta_paths' of %s cannot be None" % self
+                    "Property 'protein_fasta_paths' of %s cannot be None"
+                    % self
                 )
             self._protein_sequences = SequenceData(
                 fasta_paths=self.protein_fasta_paths,
@@ -359,13 +373,16 @@ def protein_sequences(self):
     def transcript_sequences(self):
         if self._transcript_sequences is None:
             if not self.requires_transcript_fasta:
-                raise ValueError("Missing transcript FASTA source for %s" % self)
+                raise ValueError(
+                    "Missing transcript FASTA source for %s" % self
+                )
             # make sure transcript FASTA file exists locally
             # and populate self.transcript_fasta_paths
             self._set_local_paths(download_if_missing=False, overwrite=False)
             if self.transcript_fasta_paths is None:
                 raise ValueError(
-                    "Property 'transcript_fasta_paths' of %s cannot be None" % (self,)
+                    "Property 'transcript_fasta_paths' of %s cannot be None"
+                    % (self,)
                 )
             self._transcript_sequences = SequenceData(
                 fasta_paths=self.transcript_fasta_paths,
@@ -375,8 +392,8 @@ def transcript_sequences(self):
 
     def install_string(self):
         """
-        Add every missing file to the install string shown to the user
-        in an error message.
+        Add every missing file to the install string shown to the user in an
+        error message.
         """
         args = [
             "--reference-name",
@@ -450,7 +467,7 @@ def __hash__(self):
 
     def clear_cache(self):
         """
-        Clear any in-memory cached values
+        Clear any in-memory cached values.
         """
         for maybe_fn in self.__dict__.values():
             # clear cache associated with all memoization decorators,
@@ -460,7 +477,7 @@ def clear_cache(self):
 
     def delete_index_files(self):
         """
-        Delete all data aside from source GTF and FASTA files
+        Delete all data aside from source GTF and FASTA files.
         """
         self.clear_cache()
         db_path = self.db.local_db_path()
@@ -471,9 +488,8 @@ def _all_feature_values(
         self, column, feature, distinct=True, contig=None, strand=None
     ):
         """
-        Cached lookup of all values for a particular feature property from
-        the database, caches repeated queries in memory and
-        stores them as a CSV.
+        Cached lookup of all values for a particular feature property from the
+        database, caches repeated queries in memory and stores them as a CSV.
 
         Parameters
         ----------
@@ -504,23 +520,31 @@ def _all_feature_values(
         )
 
     def transcript_sequence(self, transcript_id):
-        """Return cDNA nucleotide sequence of transcript, or None if
-        transcript doesn't have cDNA sequence.
+        """
+        Return cDNA nucleotide sequence of transcript, or None if transcript
+        doesn't have cDNA sequence.
         """
         if self.transcript_sequences is None:
-            raise ValueError("No transcript FASTA supplied to this Genome: %s" % self)
+            raise ValueError(
+                "No transcript FASTA supplied to this Genome: %s" % self
+            )
         return self.transcript_sequences.get(transcript_id)
 
     def protein_sequence(self, protein_id):
-        """Return cDNA nucleotide sequence of transcript, or None if
-        transcript doesn't have cDNA sequence.
+        """
+        Return cDNA nucleotide sequence of transcript, or None if transcript
+        doesn't have cDNA sequence.
         """
         if self.protein_sequences is None:
-            raise ValueError("No protein FASTA supplied to this Genome: %s" % self)
+            raise ValueError(
+                "No protein FASTA supplied to this Genome: %s" % self
+            )
         return self.protein_sequences.get(protein_id)
 
     def genes_at_locus(self, contig, position, end=None, strand=None):
-        gene_ids = self.gene_ids_at_locus(contig, position, end=end, strand=strand)
+        gene_ids = self.gene_ids_at_locus(
+            contig, position, end=end, strand=strand
+        )
         return [self.gene_by_id(gene_id) for gene_id in gene_ids]
 
     def transcripts_at_locus(self, contig, position, end=None, strand=None):
@@ -528,11 +552,14 @@ def transcripts_at_locus(self, contig, position, end=None, strand=None):
             contig, position, end=end, strand=strand
         )
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def exons_at_locus(self, contig, position, end=None, strand=None):
-        exon_ids = self.exon_ids_at_locus(contig, position, end=end, strand=strand)
+        exon_ids = self.exon_ids_at_locus(
+            contig, position, end=end, strand=strand
+        )
         return [self.exon_by_id(exon_id) for exon_id in exon_ids]
 
     def gene_ids_at_locus(self, contig, position, end=None, strand=None):
@@ -575,7 +602,9 @@ def transcript_ids_at_locus(self, contig, position, end=None, strand=None):
             strand=strand,
         )
 
-    def transcript_names_at_locus(self, contig, position, end=None, strand=None):
+    def transcript_names_at_locus(
+        self, contig, position, end=None, strand=None
+    ):
         return self.db.distinct_column_values_at_locus(
             column="transcript_name",
             feature="transcript",
@@ -605,7 +634,7 @@ def protein_ids_at_locus(self, contig, position, end=None, strand=None):
 
     def locus_of_gene_id(self, gene_id):
         """
-        Given a gene ID returns Locus with: chromosome, start, stop, strand
+        Given a gene ID returns Locus with: chromosome, start, stop, strand.
         """
         return self.db.query_locus(
             filter_column="gene_id", filter_value=gene_id, feature="gene"
@@ -614,9 +643,9 @@ def locus_of_gene_id(self, gene_id):
     def loci_of_gene_names(self, gene_name):
         """
         Given a gene name returns list of Locus objects with fields:
-            chromosome, start, stop, strand
-        You can get multiple results since a gene might have multiple copies
-        in the genome.
+
+        chromosome, start, stop, strand You can get multiple results
+        since a gene might have multiple copies in the genome.
         """
         return self.db.query_loci("gene_name", gene_name, "gene")
 
@@ -629,7 +658,7 @@ def locus_of_transcript_id(self, transcript_id):
 
     def locus_of_exon_id(self, exon_id):
         """
-        Given an exon ID returns Locus
+        Given an exon ID returns Locus.
         """
         return self.db.query_locus("exon_id", exon_id, feature="exon")
 
@@ -641,8 +670,8 @@ def locus_of_exon_id(self, exon_id):
 
     def contigs(self):
         """
-        Returns all contig names for any gene in the genome
-        (field called "seqname" in Ensembl GTF files)
+        Returns all contig names for any gene in the genome (field called
+        "seqname" in Ensembl GTF files)
         """
         return self.db.query_feature_values("seqname", "gene")
 
@@ -703,7 +732,9 @@ def gene_by_id(self, gene_id):
 
             gene_name, gene_biotype = None, None
             if len(result) < 4 or len(result) > 6:
-                raise ValueError("Result is not the expected length: %d" % len(result))
+                raise ValueError(
+                    "Result is not the expected length: %d" % len(result)
+                )
             contig, start, end, strand = result[:4]
             if len(result) == 5:
                 if "gene_name" in field_names:
@@ -737,8 +768,8 @@ def genes_by_name(self, gene_name):
 
     def gene_by_protein_id(self, protein_id):
         """
-        Get the gene ID associated with the given protein ID,
-        return its Gene object
+        Get the gene ID associated with the given protein ID, return its Gene
+        object.
         """
         gene_id = self.gene_id_of_protein_id(protein_id)
         return self.gene_by_id(gene_id)
@@ -762,8 +793,8 @@ def _query_gene_name(self, property_name, property_value, feature_type):
 
     def gene_names(self, contig=None, strand=None):
         """
-        Return all genes in the database,
-        optionally restrict to a chromosome and/or strand.
+        Return all genes in the database, optionally restrict to a chromosome
+        and/or strand.
         """
         return self._all_feature_values(
             column="gene_name", feature="gene", contig=contig, strand=strand
@@ -773,10 +804,14 @@ def gene_name_of_gene_id(self, gene_id):
         return self._query_gene_name("gene_id", gene_id, "gene")
 
     def gene_name_of_transcript_id(self, transcript_id):
-        return self._query_gene_name("transcript_id", transcript_id, "transcript")
+        return self._query_gene_name(
+            "transcript_id", transcript_id, "transcript"
+        )
 
     def gene_name_of_transcript_name(self, transcript_name):
-        return self._query_gene_name("transcript_name", transcript_name, "transcript")
+        return self._query_gene_name(
+            "transcript_name", transcript_name, "transcript"
+        )
 
     def gene_name_of_exon_id(self, exon_id):
         return self._query_gene_name("exon_id", exon_id, "exon")
@@ -800,8 +835,8 @@ def _query_gene_ids(self, property_name, value, feature="gene"):
 
     def gene_ids(self, contig=None, strand=None):
         """
-        What are all the gene IDs
-        (optionally restrict to a given chromosome/contig and/or strand)
+        What are all the gene IDs (optionally restrict to a given
+        chromosome/contig and/or strand)
         """
         return self._all_feature_values(
             column="gene_id", feature="gene", contig=contig, strand=strand
@@ -810,6 +845,7 @@ def gene_ids(self, contig=None, strand=None):
     def gene_ids_of_gene_name(self, gene_name):
         """
         What are the gene IDs associated with a given gene name?
+
         (due to copy events, there might be multiple genes per name)
         """
         results = self._query_gene_ids("gene_name", gene_name)
@@ -842,17 +878,21 @@ def gene_id_of_protein_id(self, protein_id):
 
     def transcripts(self, contig=None, strand=None):
         """
-        Construct Transcript object for every transcript entry in
-        the database. Optionally restrict to a particular
-        chromosome using the `contig` argument.
+        Construct Transcript object for every transcript entry in the database.
+
+        Optionally restrict to a particular chromosome using the
+        `contig` argument.
         """
         transcript_ids = self.transcript_ids(contig=contig, strand=strand)
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def transcript_by_id(self, transcript_id):
-        """Construct Transcript object with given transcript ID"""
+        """
+        Construct Transcript object with given transcript ID.
+        """
         if transcript_id not in self._transcripts:
             optional_field_names = [
                 "transcript_name",
@@ -885,8 +925,12 @@ def transcript_by_id(self, transcript_id):
                 raise ValueError("Transcript not found: %s" % (transcript_id,))
 
             transcript_name, transcript_biotype, tsl = None, None, None
-            if len(result) < 5 or len(result) > (5 + len(optional_field_names)):
-                raise ValueError("Result is not the expected length: %d" % len(result))
+            if len(result) < 5 or len(result) > (
+                5 + len(optional_field_names)
+            ):
+                raise ValueError(
+                    "Result is not the expected length: %d" % len(result)
+                )
             contig, start, end, strand, gene_id = result[:5]
             if len(result) > 5:
                 extra_field_names = [
@@ -895,8 +939,10 @@ def transcript_by_id(self, transcript_id):
                 extra_data = dict(zip(extra_field_names, result[5:]))
                 transcript_name = extra_data.get("transcript_name")
                 transcript_biotype = extra_data.get("transcript_biotype")
-                tsl = extra_data.get("transcript_support_level")
-                if not tsl or tsl == "NA":
+                tsl = extra_data.get("transcript_support_level", "NA")
+                if tsl:
+                    tsl = tsl.split(" ")[0]
+                if not tsl or tsl == "NA" or not tsl.isnumeric():
                     tsl = None
                 else:
                     tsl = int(tsl)
@@ -917,9 +963,12 @@ def transcript_by_id(self, transcript_id):
         return self._transcripts[transcript_id]
 
     def transcripts_by_name(self, transcript_name):
-        transcript_ids = self.transcript_ids_of_transcript_name(transcript_name)
+        transcript_ids = self.transcript_ids_of_transcript_name(
+            transcript_name
+        )
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def transcript_by_protein_id(self, protein_id):
@@ -945,25 +994,31 @@ def _query_transcript_names(self, property_name, value):
 
     def transcript_names(self, contig=None, strand=None):
         """
-        What are all the transcript names in the database
-        (optionally, restrict to a given chromosome and/or strand)
+        What are all the transcript names in the database (optionally, restrict
+        to a given chromosome and/or strand)
         """
         return self._all_feature_values(
-            column="transcript_name", feature="transcript", contig=contig, strand=strand
+            column="transcript_name",
+            feature="transcript",
+            contig=contig,
+            strand=strand,
         )
 
     def transcript_names_of_gene_name(self, gene_name):
         return self._query_transcript_names("gene_name", gene_name)
 
     def transcript_name_of_transcript_id(self, transcript_id):
-        transcript_names = self._query_transcript_names("transcript_id", transcript_id)
+        transcript_names = self._query_transcript_names(
+            "transcript_id", transcript_id
+        )
         if len(transcript_names) == 0:
             raise ValueError(
                 "No transcript names for transcript ID = %s" % transcript_id
             )
         elif len(transcript_names) > 1:
             raise ValueError(
-                "Multiple transcript names for transcript ID = %s" % (transcript_id,)
+                "Multiple transcript names for transcript ID = %s"
+                % (transcript_id,)
             )
         return transcript_names[0]
 
@@ -973,7 +1028,9 @@ def transcript_name_of_transcript_id(self, transcript_id):
     #
     ###################################################
 
-    def _query_transcript_ids(self, property_name, value, feature="transcript"):
+    def _query_transcript_ids(
+        self, property_name, value, feature="transcript"
+    ):
         results = self.db.query(
             select_column_names=["transcript_id"],
             filter_column=property_name,
@@ -986,7 +1043,10 @@ def _query_transcript_ids(self, property_name, value, feature="transcript"):
 
     def transcript_ids(self, contig=None, strand=None):
         return self._all_feature_values(
-            column="transcript_id", feature="transcript", contig=contig, strand=strand
+            column="transcript_id",
+            feature="transcript",
+            contig=contig,
+            strand=strand,
         )
 
     def transcript_ids_of_gene_id(self, gene_id):
@@ -1005,7 +1065,9 @@ def transcript_id_of_protein_id(self, protein_id):
         """
         What is the transcript ID associated with a given protein ID?
         """
-        results = self._query_transcript_ids("protein_id", protein_id, feature="CDS")
+        results = self._query_transcript_ids(
+            "protein_id", protein_id, feature="CDS"
+        )
         if len(results) == 0:
             raise ValueError("Protein ID not found: %s" % protein_id)
         elif len(results) > 1:
@@ -1026,15 +1088,16 @@ def transcript_id_of_protein_id(self, protein_id):
 
     def exons(self, contig=None, strand=None):
         """
-        Create exon object for all exons in the database, optionally
-        restrict to a particular chromosome using the `contig` argument.
+        Create exon object for all exons in the database, optionally restrict
+        to a particular chromosome using the `contig` argument.
         """
         # DataFrame with single column called "exon_id"
         exon_ids = self.exon_ids(contig=contig, strand=strand)
         return [self.exon_by_id(exon_id) for exon_id in exon_ids]
 
     def exon_by_id(self, exon_id):
-        """Construct an Exon object from its ID by looking up the exon"s
+        """
+        Construct an Exon object from its ID by looking up the exon"s
         properties in the given Database.
         """
         if exon_id not in self._exons:
@@ -1109,8 +1172,8 @@ def exon_ids_of_transcript_id(self, transcript_id):
 
     def protein_ids(self, contig=None, strand=None):
         """
-        What are all the protein IDs
-        (optionally restrict to a given chromosome and/or strand)
+        What are all the protein IDs (optionally restrict to a given chromosome
+        and/or strand)
         """
         protein_ids = self._all_feature_values(
             column="protein_id",
diff --git a/pyensembl/locus.py b/pyensembl/locus.py
index b88b4a3..c087183 100644
--- a/pyensembl/locus.py
+++ b/pyensembl/locus.py
@@ -49,7 +49,8 @@ def __init__(self, contig, start, end, strand):
 
         if end < start:
             raise ValueError(
-                "Expected start <= end, got start = %d, end = %d" % (start, end)
+                "Expected start <= end, got start = %d, end = %d"
+                % (start, end)
             )
         self.start = start
         self.end = end
@@ -149,7 +150,9 @@ def offset_range(self, start, end):
             )
 
         if start < self.start or end > self.end:
-            raise ValueError("Range (%d, %d) falls outside %s" % (start, end, self))
+            raise ValueError(
+                "Range (%d, %d) falls outside %s" % (start, end, self)
+            )
 
         if self.on_forward_strand:
             return (start - self.start, end - self.start)
@@ -183,7 +186,9 @@ def can_overlap(self, contig, strand=None):
         """
         Is this locus on the same contig and (optionally) on the same strand?
         """
-        return self.on_contig(contig) and (strand is None or self.on_strand(strand))
+        return self.on_contig(contig) and (
+            strand is None or self.on_strand(strand)
+        )
 
     def distance_to_interval(self, start, end):
         """
@@ -220,15 +225,23 @@ def overlaps(self, contig, start, end, strand=None):
 
     def overlaps_locus(self, other_locus):
         return self.overlaps(
-            other_locus.contig, other_locus.start, other_locus.end, other_locus.strand
+            other_locus.contig,
+            other_locus.start,
+            other_locus.end,
+            other_locus.strand,
         )
 
     def contains(self, contig, start, end, strand=None):
         return (
-            self.can_overlap(contig, strand) and start >= self.start and end <= self.end
+            self.can_overlap(contig, strand)
+            and start >= self.start
+            and end <= self.end
         )
 
     def contains_locus(self, other_locus):
         return self.contains(
-            other_locus.contig, other_locus.start, other_locus.end, other_locus.strand
+            other_locus.contig,
+            other_locus.start,
+            other_locus.end,
+            other_locus.strand,
         )
diff --git a/pyensembl/locus_with_genome.py b/pyensembl/locus_with_genome.py
index 33dd38d..338a222 100644
--- a/pyensembl/locus_with_genome.py
+++ b/pyensembl/locus_with_genome.py
@@ -16,8 +16,8 @@
 
 class LocusWithGenome(Locus):
     """
-    Common base class for Gene and Transcript to avoid copying
-    their shared logic.
+    Common base class for Gene and Transcript to avoid copying their shared
+    logic.
     """
 
     def __init__(self, contig, start, end, strand, biotype, genome):
@@ -39,16 +39,17 @@ def to_dict(self):
     @property
     def is_protein_coding(self):
         """
-        We're not counting immunoglobulin-like genes from the T-cell receptor or
-        or antibodies since they occur in fragments that must be recombined.
-        It might be worth consider counting non-sense mediated decay and
-        non-stop decay since variants in these could potentially make a
-        functional protein. To read more about the biotypes used in Ensembl:
-            http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
-            http://www.gencodegenes.org/gencode_biotypes.html
-
-        For now let's stick with the simple category of 'protein_coding', which
-        means that there is an open reading frame in this gene/transcript
-        whose successful transcription has been observed.
+        We're not counting immunoglobulin-like genes from the T-cell receptor
+        or or antibodies since they occur in fragments that must be recombined.
+        It might be worth consider counting non-sense mediated decay and non-
+        stop decay since variants in these could potentially make a functional
+        protein. To read more about the biotypes used in Ensembl:
+        http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
+        http://www.gencodegenes.org/gencode_biotypes.html.
+
+        For now let's stick with the simple category of
+        'protein_coding', which means that there is an open reading
+        frame in this gene/transcript whose successful transcription has
+        been observed.
         """
-        return self.biotype == "protein_coding"
+        return self.biotype in "protein_coding"
diff --git a/pyensembl/normalization.py b/pyensembl/normalization.py
index fb0cc33..81f65c5 100644
--- a/pyensembl/normalization.py
+++ b/pyensembl/normalization.py
@@ -11,7 +11,8 @@
 # limitations under the License.
 
 from sys import intern
-from typechecks import is_string, is_integer
+
+from typechecks import is_integer, is_string
 
 # Manually memoizing here, since our simple common.memoize function has
 # noticable overhead in this instance.
diff --git a/pyensembl/reference_name.py b/pyensembl/reference_name.py
index 1b7639d..5731d80 100644
--- a/pyensembl/reference_name.py
+++ b/pyensembl/reference_name.py
@@ -29,7 +29,9 @@ def normalize_reference_name(name):
 
 
 def find_species_by_reference(reference_name):
-    return Species._reference_names_to_species[normalize_reference_name(reference_name)]
+    return Species._reference_names_to_species[
+        normalize_reference_name(reference_name)
+    ]
 
 
 def which_reference(species_name, ensembl_release):
@@ -42,7 +44,9 @@ def max_ensembl_release(reference_name):
     return max_release
 
 
-def genome_for_reference_name(reference_name, allow_older_downloaded_release=True):
+def genome_for_reference_name(
+    reference_name, allow_older_downloaded_release=True
+):
     """
     Given a genome reference name, such as "GRCh38", returns the
     corresponding Ensembl Release object.
@@ -60,7 +64,9 @@ def genome_for_reference_name(reference_name, allow_older_downloaded_release=Tru
     ]
     if allow_older_downloaded_release:
         # go through candidate releases in descending order
-        for release in reversed(range(min_ensembl_release, max_ensembl_release + 1)):
+        for release in reversed(
+            range(min_ensembl_release, max_ensembl_release + 1)
+        ):
             # check if release has been locally downloaded
             candidate = EnsemblRelease.cached(release=release, species=species)
             if candidate.required_local_files_exist():
@@ -70,6 +76,6 @@ def genome_for_reference_name(reference_name, allow_older_downloaded_release=Tru
     return EnsemblRelease.cached(release=max_ensembl_release, species=species)
 
 
-ensembl_grch36 = genome_for_reference_name("ncbi36")
-ensembl_grch37 = genome_for_reference_name("grch37")
-ensembl_grch38 = genome_for_reference_name("grch38")
+# ensembl_grch36 = genome_for_reference_name("ncbi36")
+# ensembl_grch37 = genome_for_reference_name("grch37")
+# ensembl_grch38 = genome_for_reference_name("grch38")
diff --git a/pyensembl/sequence_data.py b/pyensembl/sequence_data.py
index 631c748..e18a9e8 100644
--- a/pyensembl/sequence_data.py
+++ b/pyensembl/sequence_data.py
@@ -10,14 +10,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from os import remove
-from os.path import exists, abspath, split, join
 import logging
-from collections import Counter
 import pickle
-from .common import load_pickle, dump_pickle
-from .fasta import parse_fasta_dictionary
+from collections import Counter
+from os import remove
+from os.path import abspath, exists, join, split
 
+from .common import dump_pickle, load_pickle
+from .fasta import parse_fasta_dictionary
 
 logger = logging.getLogger(__name__)
 
@@ -32,10 +32,14 @@ def __init__(self, fasta_paths, cache_directory_path=None):
             fasta_paths = [fasta_paths]
 
         self.fasta_paths = [abspath(path) for path in fasta_paths]
-        self.fasta_directory_paths = [split(path)[0] for path in self.fasta_paths]
+        self.fasta_directory_paths = [
+            split(path)[0] for path in self.fasta_paths
+        ]
         self.fasta_filenames = [split(path)[1] for path in self.fasta_paths]
         if cache_directory_path:
-            self.cache_directory_paths = [cache_directory_path] * len(self.fasta_paths)
+            self.cache_directory_paths = [cache_directory_path] * len(
+                self.fasta_paths
+            )
         else:
             self.cache_directory_paths = self.fasta_directory_paths
         for path in self.fasta_paths:
@@ -104,7 +108,9 @@ def _load_or_create_fasta_dictionary_pickle(self):
                 try:
                     fasta_dictionary_tmp = load_pickle(pickle_path)
                     self._add_to_fasta_dictionary(fasta_dictionary_tmp)
-                    logger.info("Loaded sequence dictionary from %s", pickle_path)
+                    logger.info(
+                        "Loaded sequence dictionary from %s", pickle_path
+                    )
                     continue
                 except (pickle.UnpicklingError, AttributeError):
                     # catch either an UnpicklingError or an AttributeError
diff --git a/pyensembl/shell.py b/pyensembl/shell.py
old mode 100755
new mode 100644
index cd7ab3c..546dfa9
--- a/pyensembl/shell.py
+++ b/pyensembl/shell.py
@@ -30,6 +30,9 @@
 To list all installed genomes:
     %(prog)s list
 
+To list all available genomes:
+    %(prog)s available
+
 To install a genome from source files:
     %(prog)s install \
  --reference-name "GRCh38" \
@@ -40,14 +43,18 @@
 
 import argparse
 import logging.config
-import pkg_resources
 import os
 
-from .ensembl_release import EnsemblRelease, MAX_ENSEMBL_RELEASE
+import pkg_resources
+
+from .config import MAX_ENSEMBL_RELEASE
+from .ensembl_release import EnsemblRelease
 from .genome import Genome
-from .species import Species
+from .species import Species, normalize_species_name
 
-logging.config.fileConfig(pkg_resources.resource_filename(__name__, "logging.conf"))
+logging.config.fileConfig(
+    pkg_resources.resource_filename(__name__, "logging.conf")
+)
 logger = logging.getLogger(__name__)
 
 
@@ -94,7 +101,9 @@
 )
 
 path_group.add_argument(
-    "--annotation-name", default=None, help="Name of annotation source (e.g. refseq)"
+    "--annotation-name",
+    default=None,
+    help="Name of annotation source (e.g. refseq)",
 )
 
 path_group.add_argument(
@@ -140,6 +149,7 @@
         "delete-all-files",
         "delete-index-files",
         "list",
+        "available",
     ),
     help=(
         '"install" will download and index any data that is  not '
@@ -151,6 +161,25 @@
 )
 
 
+def collect_all_available_ensembl_releases():
+    for species_name in Species.all_registered_latin_names():
+        species = Species._latin_names_to_species[species_name]
+        # print in tree format
+        print(
+            "* "
+            + species_name
+            + " ("
+            + ", ".join(species.synonyms)
+            + ")"
+            + ":"
+        )
+        for (
+            release_name,
+            release_range,
+        ) in species.reference_assemblies.items():
+            print("  * " + release_name + ":", release_range)
+
+
 def collect_all_installed_ensembl_releases():
     genomes = []
     for species, release in Species.all_species_release_pairs():
@@ -164,11 +193,26 @@ def all_combinations_of_ensembl_genomes(args):
     """
     Use all combinations of species and release versions specified by the
     commandline arguments to return a list of EnsemblRelease or Genome objects.
-    The results will typically be of type EnsemblRelease unless the
+    The results will typically be of type EnsemblRelease unless the.
+
     --custom-mirror argument was given.
     """
     species_list = args.species if args.species else ["human"]
-    release_list = args.release if args.release else [MAX_ENSEMBL_RELEASE]
+
+    release_list = (
+        args.release
+        if args.release
+        else [
+            max(
+                i
+                for _, i in Species._latin_names_to_species[
+                    normalize_species_name(species_name)
+                ].reference_assemblies.values()
+            )
+            for species_name in species_list
+        ]
+    )
+
     genomes = []
     for species in species_list:
         # Otherwise, use Ensembl release information
@@ -182,11 +226,13 @@ def all_combinations_of_ensembl_genomes(args):
                 # URL to be a directory with all the same filenames as
                 # would be provided by Ensembl
                 gtf_url = os.path.join(
-                    args.custom_mirror, os.path.basename(ensembl_release.gtf_url)
+                    args.custom_mirror,
+                    os.path.basename(ensembl_release.gtf_url),
                 )
                 transcript_fasta_urls = [
                     os.path.join(
-                        args.custom_mirror, os.path.basename(transcript_fasta_url)
+                        args.custom_mirror,
+                        os.path.basename(transcript_fasta_url),
                     )
                     for transcript_fasta_url in ensembl_release.transcript_fasta_urls
                 ]
@@ -244,7 +290,9 @@ def collect_selected_genomes(args):
 
 def run():
     args = parser.parse_args()
-    if args.action == "list":
+    if args.action == "available":
+        collect_all_available_ensembl_releases()
+    elif args.action == "list":
         # TODO: how do we also identify which non-Ensembl genomes are
         # installed?
         genomes = collect_all_installed_ensembl_releases()
diff --git a/pyensembl/species.py b/pyensembl/species.py
index a236bb1..cb78766 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -12,7 +12,7 @@
 
 from serializable import Serializable
 
-from .ensembl_release_versions import MAX_ENSEMBL_RELEASE
+from .config import SPECIES_DATA
 
 # TODO: replace Serializable with data class
 
@@ -30,15 +30,18 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(cls, latin_name, synonyms, reference_assemblies):
+    def register(
+        cls, latin_name, synonyms, reference_assemblies, database=None
+    ):
         """
-        Create a Species object from the given arguments and enter into
-        all the dicts used to look the species up by its fields.
+        Create a Species object from the given arguments and enter into all the
+        dicts used to look the species up by its fields.
         """
         species = Species(
             latin_name=latin_name,
             synonyms=synonyms,
             reference_assemblies=reference_assemblies,
+            database=database,
         )
         cls._latin_names_to_species[species.latin_name] = species
         for synonym in synonyms:
@@ -71,8 +74,8 @@ def all_registered_latin_names(cls):
     @classmethod
     def all_species_release_pairs(cls):
         """
-        Generator which yields (species, release) pairs
-        for all possible combinations.
+        Generator which yields (species, release) pairs for all possible
+        combinations.
         """
         for species_name in cls.all_registered_latin_names():
             species = cls._latin_names_to_species[species_name]
@@ -80,7 +83,9 @@ def all_species_release_pairs(cls):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
+    def __init__(
+        self, latin_name, synonyms=[], reference_assemblies={}, database=None
+    ):
         """
         Parameters
         ----------
@@ -95,6 +100,7 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
         self.latin_name = latin_name.lower().replace(" ", "_")
         self.synonyms = synonyms
         self.reference_assemblies = reference_assemblies
+        self.database = database
         self._release_to_genome = {}
         for genome_name, (start, end) in self.reference_assemblies.items():
             for i in range(start, end + 1):
@@ -114,10 +120,14 @@ def which_reference(self, ensembl_release):
         return self._release_to_genome[ensembl_release]
 
     def __str__(self):
-        return "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)" % (
-            self.latin_name,
-            self.synonyms,
-            self.reference_assemblies,
+        return (
+            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s, database=%s)"
+            % (
+                self.latin_name,
+                self.synonyms,
+                self.reference_assemblies,
+                self.database,
+            )
         )
 
     def __eq__(self, other):
@@ -126,6 +136,7 @@ def __eq__(self, other):
             and self.latin_name == other.latin_name
             and self.synonyms == other.synonyms
             and self.reference_assemblies == other.reference_assemblies
+            and self.database == other.database
         )
 
     def to_dict(self):
@@ -141,15 +152,17 @@ def __hash__(self):
                 self.latin_name,
                 tuple(self.synonyms),
                 frozenset(self.reference_assemblies.items()),
+                self.database,
             )
         )
 
 
 def normalize_species_name(name):
     """
-    If species name was "Homo sapiens" then replace spaces with underscores
-    and return "homo_sapiens". Also replace common names like "human" with
-    "homo_sapiens".
+    If species name was "Homo sapiens" then replace spaces with underscores and
+    return "homo_sapiens".
+
+    Also replace common names like "human" with "homo_sapiens".
     """
     lower_name = name.lower().strip()
 
@@ -173,6 +186,8 @@ def find_species_by_name(species_name):
 def check_species_object(species_name_or_object):
     """
     Helper for validating user supplied species names or objects.
+
+    Return `Species` Object
     """
     if isinstance(species_name_or_object, Species):
         return species_name_or_object
@@ -185,168 +200,10 @@ def check_species_object(species_name_or_object):
         )
 
 
-human = Species.register(
-    latin_name="homo_sapiens",
-    synonyms=["human"],
-    reference_assemblies={
-        "GRCh38": (76, MAX_ENSEMBL_RELEASE),
-        "GRCh37": (55, 75),
-        "NCBI36": (54, 54),
-    },
-)
-
-mouse = Species.register(
-    latin_name="mus_musculus",
-    synonyms=["mouse", "house mouse"],
-    reference_assemblies={
-        "NCBIM37": (54, 67),
-        "GRCm38": (68, 102),
-        "GRCm39": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-dog = Species.register(
-    latin_name="canis_familiaris",
-    synonyms=["dog"],
-    reference_assemblies={"CanFam3.1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-cat = Species.register(
-    latin_name="felis_catus",
-    synonyms=["cat"],
-    reference_assemblies={
-        "Felis_catus_6.2": (75, 90),
-        "Felis_catus_8.0": (91, 92),
-        "Felis_catus_9.0": (93, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-chicken = Species.register(
-    latin_name="gallus_gallus",
-    synonyms=["chicken"],
-    reference_assemblies={
-        "Galgal4": (75, 85),
-        "Gallus_gallus-5.0": (86, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-# Does the black rat (Rattus Rattus) get used for research too?
-brown_rat = Species.register(
-    latin_name="rattus_norvegicus",
-    synonyms=["brown rat", "lab rat", "rat"],
-    reference_assemblies={
-        "Rnor_5.0": (75, 79),
-        "Rnor_6.0": (80, 104),
-        "mRatBN7.2": (105, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-macaque = Species.register(
-    latin_name="macaca_fascicularis",
-    synonyms=["macaque", "Crab-eating macaque"],
-    reference_assemblies={
-        "Macaca_fascicularis_6.0": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-green_monkey = Species.register(
-    latin_name="chlorocebus_sabaeus",
-    synonyms=["green_monkey", "african_green_monkey"],
-    reference_assemblies={
-        "ChlSab1.1": (86, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-rhesus = Species.register(
-    latin_name="macaca_mulatta",
-    synonyms=["rhesus"],
-    reference_assemblies={"Mmul_10": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-rabbit = Species.register(
-    latin_name="oryctolagus_cuniculus",
-    synonyms=["rabbit"],
-    reference_assemblies={"OryCun2.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-gerbil = Species.register(
-    latin_name="meriones_unguiculatus",
-    synonyms=["gerbil"],
-    reference_assemblies={"MunDraft-v1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-syrian_hamster = Species.register(
-    latin_name="mesocricetus_auratus",
-    synonyms=["syrian_hamster"],
-    reference_assemblies={"MesAur1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-chinese_hamster = Species.register(
-    latin_name="cricetulus_griseus_chok1gshd",
-    synonyms=["chinese_hamster"],
-    reference_assemblies={"CHOK1GS_HDv1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-naked_mole_rat = Species.register(
-    latin_name="heterocephalus_glaber_female",
-    synonyms=["naked_mole_rat"],
-    reference_assemblies={"HetGla_female_1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-guinea_pig = Species.register(
-    latin_name="cavia_porcellus",
-    synonyms=["guinea_pig"],
-    reference_assemblies={"Cavpor3.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-pig = Species.register(
-    latin_name="sus_scrofa",
-    synonyms=["pig"],
-    reference_assemblies={"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-zebrafish = Species.register(
-    latin_name="danio_rerio",
-    synonyms=["zebrafish"],
-    reference_assemblies={
-        "ZFISH7": (47, 53),
-        "Zv8": (54, 59),
-        "Zv9": (60, 79),
-        "GRCz10": (80, 91),
-        "GRCz11": (92, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-fly = Species.register(
-    latin_name="drosophila_melanogaster",
-    synonyms=["drosophila", "fruit fly", "fly"],
-    reference_assemblies={
-        "BDGP5": (75, 78),
-        "BDGP6": (79, 95),
-        "BDGP6.22": (96, 98),
-        "BDGP6.28": (99, 102),
-        "BDGP6.32": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-nematode = Species.register(
-    latin_name="caenorhabditis_elegans",
-    synonyms=["nematode", "C_elegans"],
-    reference_assemblies={
-        "WS180": (47, 49),
-        "WS190": (50, 54),
-        "WS200": (55, 57),
-        "WS210": (58, 59),
-        "WS220": (61, 66),
-        "WBcel215": (67, 70),
-        "WBcel235": (71, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-yeast = Species.register(
-    latin_name="saccharomyces_cerevisiae",
-    synonyms=["yeast", "budding_yeast"],
-    reference_assemblies={
-        "R64-1-1": (76, MAX_ENSEMBL_RELEASE),
-    },
-)
+for data in SPECIES_DATA:
+    globals()[data["synonyms"][0]] = Species.register(
+        latin_name=data["latin_name"],
+        synonyms=data["synonyms"],
+        reference_assemblies=data["reference_assemblies"],
+        database=data.get("database", None),
+    )
diff --git a/pyensembl/species.py.orig b/pyensembl/species.py.orig
new file mode 100644
index 0000000..cb78766
--- /dev/null
+++ b/pyensembl/species.py.orig
@@ -0,0 +1,209 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from serializable import Serializable
+
+from .config import SPECIES_DATA
+
+# TODO: replace Serializable with data class
+
+
+class Species(Serializable):
+    """
+    Container for combined information about a species name, its synonyn names
+    and which reference to use for this species in each Ensembl release.
+    """
+
+    # as species instances get created, they get registered in these
+    # dictionaries
+    _latin_names_to_species = {}
+    _common_names_to_species = {}
+    _reference_names_to_species = {}
+
+    @classmethod
+    def register(
+        cls, latin_name, synonyms, reference_assemblies, database=None
+    ):
+        """
+        Create a Species object from the given arguments and enter into all the
+        dicts used to look the species up by its fields.
+        """
+        species = Species(
+            latin_name=latin_name,
+            synonyms=synonyms,
+            reference_assemblies=reference_assemblies,
+            database=database,
+        )
+        cls._latin_names_to_species[species.latin_name] = species
+        for synonym in synonyms:
+            if synonym in cls._common_names_to_species:
+                raise ValueError(
+                    "Can't use synonym '%s' for both %s and %s"
+                    % (synonym, species, cls._common_names_to_species[synonym])
+                )
+            cls._common_names_to_species[synonym] = species
+        for reference_name in reference_assemblies:
+            if reference_name in cls._reference_names_to_species:
+                raise ValueError(
+                    "Can't use reference '%s' for both %s and %s"
+                    % (
+                        reference_name,
+                        species,
+                        cls._reference_names_to_species[reference_name],
+                    )
+                )
+            cls._reference_names_to_species[reference_name] = species
+        return species
+
+    @classmethod
+    def all_registered_latin_names(cls):
+        """
+        Returns latin name of every registered species.
+        """
+        return list(cls._latin_names_to_species.keys())
+
+    @classmethod
+    def all_species_release_pairs(cls):
+        """
+        Generator which yields (species, release) pairs for all possible
+        combinations.
+        """
+        for species_name in cls.all_registered_latin_names():
+            species = cls._latin_names_to_species[species_name]
+            for _, release_range in species.reference_assemblies.items():
+                for release in range(release_range[0], release_range[1] + 1):
+                    yield species_name, release
+
+    def __init__(
+        self, latin_name, synonyms=[], reference_assemblies={}, database=None
+    ):
+        """
+        Parameters
+        ----------
+        latin_name : str
+
+        synonyms : list of strings
+
+        reference_assemblies : dict
+            Mapping of names of reference genomes onto inclusive ranges of
+            Ensembl releases Example: {"GRCh37": (54, 75)}
+        """
+        self.latin_name = latin_name.lower().replace(" ", "_")
+        self.synonyms = synonyms
+        self.reference_assemblies = reference_assemblies
+        self.database = database
+        self._release_to_genome = {}
+        for genome_name, (start, end) in self.reference_assemblies.items():
+            for i in range(start, end + 1):
+                if i in self._release_to_genome:
+                    raise ValueError(
+                        "Ensembl release %d for %s already has an associated genome"
+                        % (i, latin_name)
+                    )
+                self._release_to_genome[i] = genome_name
+
+    def which_reference(self, ensembl_release):
+        if ensembl_release not in self._release_to_genome:
+            raise ValueError(
+                "No genome for %s in Ensembl release %d"
+                % (self.latin_name, ensembl_release)
+            )
+        return self._release_to_genome[ensembl_release]
+
+    def __str__(self):
+        return (
+            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s, database=%s)"
+            % (
+                self.latin_name,
+                self.synonyms,
+                self.reference_assemblies,
+                self.database,
+            )
+        )
+
+    def __eq__(self, other):
+        return (
+            other.__class__ is Species
+            and self.latin_name == other.latin_name
+            and self.synonyms == other.synonyms
+            and self.reference_assemblies == other.reference_assemblies
+            and self.database == other.database
+        )
+
+    def to_dict(self):
+        return {"latin_name": self.latin_name}
+
+    @classmethod
+    def from_dict(cls, state_dict):
+        return cls._latin_names_to_species[state_dict["latin_name"]]
+
+    def __hash__(self):
+        return hash(
+            (
+                self.latin_name,
+                tuple(self.synonyms),
+                frozenset(self.reference_assemblies.items()),
+                self.database,
+            )
+        )
+
+
+def normalize_species_name(name):
+    """
+    If species name was "Homo sapiens" then replace spaces with underscores and
+    return "homo_sapiens".
+
+    Also replace common names like "human" with "homo_sapiens".
+    """
+    lower_name = name.lower().strip()
+
+    # if given a common name such as "human", look up its latin equivalent
+    if lower_name in Species._common_names_to_species:
+        return Species._common_names_to_species[lower_name].latin_name
+
+    return lower_name.replace(" ", "_")
+
+
+def find_species_by_name(species_name):
+    latin_name = normalize_species_name(species_name)
+    if latin_name not in Species._latin_names_to_species:
+        raise ValueError(
+            "Species not found: %s, for non-Ensembl data see https://github.com/openvax/pyensembl#non-ensembl-data"
+            % (species_name,)
+        )
+    return Species._latin_names_to_species[latin_name]
+
+
+def check_species_object(species_name_or_object):
+    """
+    Helper for validating user supplied species names or objects.
+
+    Return `Species` Object
+    """
+    if isinstance(species_name_or_object, Species):
+        return species_name_or_object
+    elif isinstance(species_name_or_object, str):
+        return find_species_by_name(species_name_or_object)
+    else:
+        raise ValueError(
+            "Unexpected type for species: %s : %s"
+            % (species_name_or_object, type(species_name_or_object))
+        )
+
+
+for data in SPECIES_DATA:
+    globals()[data["synonyms"][0]] = Species.register(
+        latin_name=data["latin_name"],
+        synonyms=data["synonyms"],
+        reference_assemblies=data["reference_assemblies"],
+        database=data.get("database", None),
+    )
diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 9d30c5c..694e702 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -24,18 +24,20 @@ class Transcript(LocusWithGenome):
     and not using the sequence, avoid the memory/performance overhead
     of fetching and storing sequences from a FASTA file.
     """
+
     def __init__(
-            self,
-            transcript_id,
-            transcript_name,
-            contig,
-            start,
-            end,
-            strand,
-            biotype,
-            gene_id,
-            genome,
-            support_level=None):
+        self,
+        transcript_id,
+        transcript_name,
+        contig,
+        start,
+        end,
+        strand,
+        biotype,
+        gene_id,
+        genome,
+        support_level=None,
+    ):
         LocusWithGenome.__init__(
             self,
             contig=contig,
@@ -43,7 +45,8 @@ def __init__(
             end=end,
             strand=strand,
             biotype=biotype,
-            genome=genome)
+            genome=genome,
+        )
         self.transcript_id = transcript_id
         self.transcript_name = transcript_name
         self.gene_id = gene_id
@@ -71,16 +74,18 @@ def __str__(self):
             " biotype='%s',"
             " contig='%s',"
             " start=%d,"
-            " end=%d, strand='%s', genome='%s')") % (
-                self.transcript_id,
-                self.name,
-                self.gene_id,
-                self.biotype,
-                self.contig,
-                self.start,
-                self.end,
-                self.strand,
-                self.genome.reference_name)
+            " end=%d, strand='%s', genome='%s')"
+        ) % (
+            self.transcript_id,
+            self.name,
+            self.gene_id,
+            self.biotype,
+            self.contig,
+            self.start,
+            self.end,
+            self.strand,
+            self.genome.reference_name,
+        )
 
     def __len__(self):
         """
@@ -90,9 +95,10 @@ def __len__(self):
 
     def __eq__(self, other):
         return (
-            other.__class__ is Transcript and
-            self.id == other.id and
-            self.genome == other.genome)
+            other.__class__ is Transcript
+            and self.id == other.id
+            and self.genome == other.genome
+        )
 
     def __hash__(self):
         return hash(self.id)
@@ -123,7 +129,8 @@ def exons(self):
             columns,
             filter_column="transcript_id",
             filter_value=self.id,
-            feature="exon")
+            feature="exon",
+        )
 
         # fill this list in its correct order (by exon_number) by using
         # the exon_number as a 1-based list offset
@@ -133,15 +140,17 @@ def exons(self):
             exon = self.genome.exon_by_id(exon_id)
             if exon is None:
                 raise ValueError(
-                    "Missing exon %s for transcript %s" % (
-                        exon_number, self.id))
+                    "Missing exon %s for transcript %s"
+                    % (exon_number, self.id)
+                )
             exon_number = int(exon_number)
             if exon_number < 1:
                 raise ValueError("Invalid exon number: %s" % exon_number)
             elif exon_number > len(exons):
                 raise ValueError(
-                    "Invalid exon number: %s (max expected = %d)" % (
-                        exon_number, len(exons)))
+                    "Invalid exon number: %s (max expected = %d)"
+                    % (exon_number, len(exons))
+                )
 
             # exon_number is 1-based, convert to list index by subtracting 1
             exon_idx = exon_number - 1
@@ -164,12 +173,14 @@ def _transcript_feature_position_ranges(self, feature, required=True):
             select_column_names=["start", "end"],
             filter_column="transcript_id",
             filter_value=self.id,
-            feature=feature)
+            feature=feature,
+        )
 
         if required and len(results) == 0:
             raise ValueError(
-                "Transcript %s does not contain feature %s" % (
-                    self.id, feature))
+                "Transcript %s does not contain feature %s"
+                % (self.id, feature)
+            )
         return results
 
     @memoize
@@ -178,19 +189,21 @@ def _transcript_feature_positions(self, feature):
         Get unique positions for feature, raise an error if feature is absent.
         """
         ranges = self._transcript_feature_position_ranges(
-            feature, required=True)
+            feature, required=True
+        )
         results = []
         # a feature (such as a stop codon), maybe be split over multiple
         # contiguous ranges. Collect all the nucleotide positions into a
         # single list.
-        for (start, end) in ranges:
+        for start, end in ranges:
             # since ranges are [inclusive, inclusive] and
             # Python ranges are [inclusive, exclusive) we have to increment
             # the end position
             for position in range(start, end + 1):
                 if position in results:
                     raise ValueError(
-                        "Repeated position %d for %s" % (position, feature))
+                        "Repeated position %d for %s" % (position, feature)
+                    )
                 results.append(position)
         return results
 
@@ -207,10 +220,9 @@ def _codon_positions(self, feature):
         results = self._transcript_feature_positions(feature)
         if len(results) != 3:
             raise ValueError(
-                "Expected 3 positions for %s of %s but got %d" % (
-                    feature,
-                    self.id,
-                    len(results)))
+                "Expected 3 positions for %s of %s but got %d"
+                % (feature, self.id, len(results))
+            )
         return results
 
     @memoized_property
@@ -219,7 +231,8 @@ def contains_start_codon(self):
         Does this transcript have an annotated start_codon entry?
         """
         start_codons = self._transcript_feature_position_ranges(
-            "start_codon", required=False)
+            "start_codon", required=False
+        )
         return len(start_codons) > 0
 
     @memoized_property
@@ -228,9 +241,10 @@ def contains_stop_codon(self):
         Does this transcript have an annotated stop_codon entry?
         """
         stop_codons = self._transcript_feature_position_ranges(
-            "stop_codon", required=False)
+            "stop_codon", required=False
+        )
         return len(stop_codons) > 0
-    
+
     @memoized_property
     def start_codon_complete(self):
         """
@@ -266,9 +280,10 @@ def exon_intervals(self):
             select_column_names=["exon_number", "start", "end"],
             filter_column="transcript_id",
             filter_value=self.id,
-            feature="exon")
+            feature="exon",
+        )
         sorted_intervals = [None] * len(results)
-        for (exon_number, start, end) in results:
+        for exon_number, start, end in results:
             sorted_intervals[int(exon_number) - 1] = (start, end)
         return sorted_intervals
 
@@ -281,15 +296,15 @@ def spliced_offset(self, position):
         """
         if type(position) is not int:
             raise TypeError(
-                "Position argument must be an integer, got %s : %s" % (
-                    position, type(position)))
+                "Position argument must be an integer, got %s : %s"
+                % (position, type(position))
+            )
 
         if position < self.start or position > self.end:
             raise ValueError(
-                "Invalid position: %d (must be between %d and %d)" % (
-                    position,
-                    self.start,
-                    self.end))
+                "Invalid position: %d (must be between %d and %d)"
+                % (position, self.start, self.end)
+            )
 
         # offset from beginning of unspliced transcript (including introns)
         unspliced_offset = self.offset(position)
@@ -306,7 +321,8 @@ def spliced_offset(self, position):
         # Intron vs. Exon: ...iiiiiieeeeeeiiiiiiiiiiiiiiiieeeeeeiiiiiiiiiii...
         for exon in self.exons:
             exon_unspliced_start, exon_unspliced_end = self.offset_range(
-                exon.start, exon.end)
+                exon.start, exon.end
+            )
             # If the relative position is not within this exon, keep a running
             # total of the total exonic length-so-far.
             #
@@ -320,11 +336,13 @@ def spliced_offset(self, position):
                 exon_offset = unspliced_offset - exon_unspliced_start
                 return total_spliced_offset + exon_offset
             else:
-                exon_length = len(exon)  # exon_end_position - exon_start_position + 1
+                exon_length = len(
+                    exon
+                )  # exon_end_position - exon_start_position + 1
                 total_spliced_offset += exon_length
         raise ValueError(
-            "Couldn't find position %d on any exon of %s" % (
-                position, self.id))
+            "Couldn't find position %d on any exon of %s" % (position, self.id)
+        )
 
     @memoized_property
     def start_codon_unspliced_offsets(self):
@@ -333,9 +351,7 @@ def start_codon_unspliced_offsets(self):
         of nucleotides in start codon.
         """
         return [
-            self.offset(position)
-            for position
-            in self.start_codon_positions
+            self.offset(position) for position in self.start_codon_positions
         ]
 
     @memoized_property
@@ -345,9 +361,7 @@ def stop_codon_unspliced_offsets(self):
         of nucleotides in stop codon.
         """
         return [
-            self.offset(position)
-            for position
-            in self.stop_codon_positions
+            self.offset(position) for position in self.stop_codon_positions
         ]
 
     def _contiguous_offsets(self, offsets):
@@ -358,8 +372,7 @@ def _contiguous_offsets(self, offsets):
         offsets.sort()
         for i in range(len(offsets) - 1):
             if offsets[i] + 1 != offsets[i + 1]:
-                raise ValueError(
-                    "Offsets not contiguous: %s" % (offsets,))
+                raise ValueError("Offsets not contiguous: %s" % (offsets,))
         return offsets
 
     @memoized_property
@@ -370,8 +383,7 @@ def start_codon_spliced_offsets(self):
         """
         offsets = [
             self.spliced_offset(position)
-            for position
-            in self.start_codon_positions
+            for position in self.start_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
@@ -383,8 +395,7 @@ def stop_codon_spliced_offsets(self):
         """
         offsets = [
             self.spliced_offset(position)
-            for position
-            in self.stop_codon_positions
+            for position in self.stop_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
@@ -403,11 +414,11 @@ def complete(self):
         a coding sequence whose length is divisible by 3
         """
         return (
-            self.contains_start_codon and
-            self.start_codon_complete and
-            self.contains_stop_codon and
-            self.coding_sequence is not None and
-            len(self.coding_sequence) % 3 == 0
+            self.contains_start_codon
+            and self.start_codon_complete
+            and self.contains_stop_codon
+            and self.coding_sequence is not None
+            and len(self.coding_sequence) % 3 == 0
         )
 
     @memoized_property
@@ -459,7 +470,7 @@ def coding_sequence(self):
 
         # pylint: disable=invalid-slice-index
         # TODO(tavi) Figure out pylint is not happy with this slice
-        return self.sequence[start:end + 1]
+        return self.sequence[start : end + 1]
 
     @memoized_property
     def five_prime_utr_sequence(self):
@@ -469,7 +480,7 @@ def five_prime_utr_sequence(self):
         """
         # pylint: disable=invalid-slice-index
         # TODO(tavi) Figure out pylint is not happy with this slice
-        return self.sequence[:self.first_start_codon_spliced_offset]
+        return self.sequence[: self.first_start_codon_spliced_offset]
 
     @memoized_property
     def three_prime_utr_sequence(self):
@@ -477,7 +488,7 @@ def three_prime_utr_sequence(self):
         cDNA sequence of 3' UTR
         (untranslated region at the end of the transcript)
         """
-        return self.sequence[self.last_stop_codon_spliced_offset + 1:]
+        return self.sequence[self.last_stop_codon_spliced_offset + 1 :]
 
     @memoized_property
     def protein_id(self):
@@ -487,7 +498,8 @@ def protein_id(self):
             filter_value=self.id,
             feature="CDS",
             distinct=True,
-            required=False)
+            required=False,
+        )
         if result_tuple:
             return result_tuple[0]
         else:
diff --git a/setup.py b/setup.py
index 45dc0a4..65dee28 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+
 import os
 import re
 
diff --git a/tests/test_ucsc_gtf.py b/tests/test_ucsc_gtf.py
index 24e444f..b40c3ff 100644
--- a/tests/test_ucsc_gtf.py
+++ b/tests/test_ucsc_gtf.py
@@ -31,13 +31,11 @@ def test_ucsc_gencode_genome():
         genome.index()
         genes = genome.genes()
         for gene in genes:
-            assert gene.id, "Gene with missing ID in %s" % (genome.gtf.dataframe(),)
+            assert gene.id, "Gene with missing ID in %s" % (genome,)
         assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (len(genes), genes)
         transcripts = genome.transcripts()
         for transcript in transcripts:
-            assert transcript.id, "Transcript with missing ID in %s" % (
-                genome.gtf.dataframe(),
-            )
+            assert transcript.id, "Transcript with missing ID in %s" % (genome,)
         assert len(transcripts) == 7, "Expected 7 transcripts, got %d: %s" % (
             len(transcripts),
             transcripts,